refactor: refactor dynamo deploy subfolder (#927)

403344e5 · hhzhang16 · GitHub · 99cd9d85 · 99cd9d85 · 99cd9d85
Unverified Commit 403344e5 authored May 06, 2025 by hhzhang16 Committed by GitHub May 06, 2025
20 changed files
--- a/deploy/Kubernetes/common/tests/run.ps1
+++ b/deploy/Kubernetes/common/tests/run.ps1
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set-strictmode -version latest
-
-. "$(& git rev-parse --show-toplevel)/deploy/Kubernetes/_build/helm-test.ps1"
-
-$tests = @(
-  @{
-    name = 'basic'
-    expected = 0
-    matches = @(
-      @{
-        indent = 6
-        lines = @(
-          'labels:'
-          '  app: test'
-          '  app.kubernetes.io/component: test_dynamo_chart'
-          '  app.kubernetes.io/instance: test'
-          '  app.kubernetes.io/name: test_dynamo_chart'
-          '  app.kubernetes.io/part-of: dynamo'
-          '  app.kubernetes.io/version: "1.0.0"'
-          '  app.kubernetes.io/managed-by: Helm'
-          '  helm.sh/chart: "dynamo_component"'
-          '  helm.sh/version: "1.0.0"'
-        )
-      }
-      @{
-        indent = 8
-        lines = @(
-          '- containerPort: 8000'
-          '  name: health'
-        )
-      }
-      @{
-        indent = 8
-        lines = @(
-          '- containerPort: 9345'
-          '  name: request'
-        )
-      }
-      @{
-        indent = 8
-        lines = @(
-          '- containerPort: 443'
-          '  name: api'
-        )
-      }
-      @{
-        indent = 8
-        lines = @(
-          '- containerPort: 9347'
-          '  name: metrics'
-        )
-      }
-      @{
-        indent = 10
-        lines = @(
-          'limits:'
-          '  cpu: 4'
-          '  ephemeral-storage: 1Gi'
-          '  nvidia.com/gpu: 1'
-          '  memory: 16Gi'
-        )
-      }
-      @{
-        indent = 10
-        lines = @(
-          'requests:'
-          '  cpu: 4'
-          '  ephemeral-storage: 1Gi'
-          '  nvidia.com/gpu: 1'
-          '  memory: 16Gi'
-        )
-      }
-    )
-    options = @()
-    values = @(
-      'basic.yaml'
-    )
-  }
-  @{
-    name = "resource_gpu"
-    expected = 0
-    matches = @(
-      @{
-        indent = 14
-        lines = @(
-          '- key: nvidia.com/gpu'
-          '  operator: Exists'
-        )
-      }
-      @{
-        indent = 14
-        lines = @(
-          '- key: nvidia.com/gpu.product'
-          '  operator: In'
-          '  values:'
-          '  - a10g'
-        )
-      }
-      @{
-        indent = 10
-        lines = @(
-          'limits:'
-          '  cpu: 4'
-          '  ephemeral-storage: 1Gi'
-          '  nvidia.com/gpu: 2'
-          '  memory: 16Gi'
-        )
-      }
-      @{
-        indent = 10
-        lines = @(
-          'requests:'
-          '  cpu: 4'
-          '  ephemeral-storage: 1Gi'
-          '  nvidia.com/gpu: 2'
-          '  memory: 16Gi'
-        )
-      }
-    )
-    options = @()
-    values = @(
-      'basic.yaml'
-      'resource_gpu.yaml'
-    )
-  }
-  @{
-    name = 'invalid_values'
-    expected = 1
-    matches = @(
-      'Error: values don''t meet the specifications of the schema\(s\) in the following chart\(s\):'
-      @{
-        indent = 0
-        lines = @(
-          '- kubernetes.checks.liveness.successThreshold: Must validate one and only one schema (oneOf)'
-          '- kubernetes.checks.liveness.successThreshold: Must be greater than or equal to 1'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-          '- kubernetes.checks.liveness.failureThreshold: Must validate one and only one schema (oneOf)'
-          '- kubernetes.checks.liveness.failureThreshold: Must be greater than or equal to 1'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-          '- kubernetes.checks.liveness.initialDelaySeconds: Must validate one and only one schema (oneOf)'
-          '- kubernetes.checks.liveness.initialDelaySeconds: Invalid type. Expected: integer, given: number'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-          '- kubernetes.checks.liveness.periodSeconds: Must validate one and only one schema (oneOf)'
-          '- kubernetes.checks.liveness.periodSeconds: Invalid type. Expected: integer, given: string'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-          '- ports.health: Must validate one and only one schema (oneOf)'
-          '- ports.health: Must be less than or equal to 65535'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-        '- ports.metrics: Must validate one and only one schema (oneOf)'
-        '- ports.metrics: Invalid type. Expected: integer, given: string'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-        '- ports.request: Must validate one and only one schema (oneOf)'
-        '- ports.request: Must be greater than or equal to 1025'
-        )
-      }
-      @{
-        indent = 0
-        lines = @(
-          '- resources.cpu: Must validate one and only one schema (oneOf)'
-          '- resources.cpu: Must be greater than or equal to 1'
-        )
-      }
-    )
-    options = @()
-    values = @(
-      'basic.yaml'
-      'invalid_values.yaml'
-    )
-  }
-)
-
-$config = initialize_test $args $tests
-
-# Being w/ the state of not having passed.
-$is_pass = $false
-
-try {
-  $is_pass = $(test_helm_chart $config)
-}
-catch {
-  if (get_is_debug) {
-    throw $_
-  }
-
-  fatal_exit "$_"
-}
-
-# Clean up any NVBUILD environment variables left behind by the build.
-cleanup_after
-
-if (-not $is_pass) {
-  exit -1
-}
-
-exit 0
--- a/deploy/Kubernetes/pipeline/README.md
+++ b/deploy/Kubernetes/pipeline/README.md
-# deploy Dynamo pipeline on Kubernetes
-
-This is a proof of concept for a Helm chart to deploy services defined in a bento.yaml configuration.
-
-## Usage
-
-### Prerequisites
-
- make sure dynamo cli is installed
- make sure you have a docker image registry to which you can push and pull from k8s cluster
- set the imagePullSecrets in the values.yaml file
- navigate to the pipeline deployment directory by running:
-  ```bash
-  cd deploy/Kubernetes/pipeline
-  ```
- build and push the DYNAMO_IMAGE as described in the [main README](../../README.md#building-the-dynamo_image-base-image) to an image registry
- make sure the `nats` and `etcd` dependencies are installed (under the `dependencies` subdirectory). For more details, see [Installing Required Dependencies](../../../docs/guides/dynamo_deploy.md#installing-required-dependencies)
-
-### Setting up Image Pull Secrets
-
-Before deploying, you need to ensure your Kubernetes namespace has the appropriate image pull secret configured. The Helm chart uses `docker-imagepullsecret` by default.
-
-You can create this secret in your namespace using:
-```bash
-kubectl create secret docker-registry docker-imagepullsecret \
-    --docker-server=<registry-server> \
-    --docker-username=<username> \
-    --docker-password=<password> \
-    -n <namespace>
-```
-
-Alternatively, you can modify the `imagePullSecrets` section in `deploy/Kubernetes/pipeline/chart/values.yaml` to match your registry credentials.
-
-### Install the Helm chart
-
-```bash
-export DYNAMO_IMAGE=<dynamo_docker_image_name>
-./deploy.sh <docker_registry> <k8s_namespace> <path_to_dynamo_directory> <dynamo_identifier> [<dynamo_config_file>]
-
-# example: export DYNAMO_IMAGE=nvcr.io/nvidian/nim-llm-dev/dynamo-base-worker:0.0.1
-# example: ./deploy.sh nvcr.io/nvidian/nim-llm-dev my-namespace ../../../examples/hello_world/ hello_world:Frontend
-# example: ./deploy.sh nvcr.io/nvidian/nim-llm-dev my-namespace ../../../examples/llm graphs.disagg_router:Frontend ../../../examples/llm/configs/disagg_router.yaml
-```
-
-### Test the deployment
-
-```bash
-# Forward the service port to localhost
-kubectl -n <k8s_namespace> port-forward svc/hello-world-frontend 3000:80
-
-# In another terminal window, test the API endpoint
-curl -X 'POST' 'http://localhost:3000/generate' \
-    -H 'accept: text/event-stream' \
-    -H 'Content-Type: application/json' \
-    -d '{"text": "test"}'
-```
\ No newline at end of file
--- a/deploy/Kubernetes/test_helm_charts.py
+++ b/deploy/Kubernetes/test_helm_charts.py
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import shutil
-import subprocess
-import sys
-
-import pytest
-
-
-def find_repository_root():
-    """Uses Git to find the repository's root path.
-
-    Returns:
-        str: Root path of the repository.
-    """
-    if shutil.which("git") is None:
-        pytest.skip('Required executable "git" not found.')
-
-    cmd_args = ["git", "rev-parse", "--show-toplevel"]
-
-    repository_root_path = subprocess.check_output(cmd_args).decode("utf-8")
-    repository_root_path = repository_root_path.strip()
-
-    return repository_root_path
-
-
-@pytest.mark.parametrize(
-    "component",
-    [
-        ("common"),
-    ],
-)
-def test_helm_chart(component):
-    """Executes the Helm chart test harness for specific tests.
-
-    Args:
-        component str: Folder under Kubernetes/ to find tests.
-    """
-    if shutil.which("pwsh") is None:
-        pytest.skip('Required executable "pwsh" not found.')
-
-    test_chart_path = os.path.join(
-        find_repository_root(),
-        "deploy",
-        "Kubernetes",
-        component,
-        "tests",
-        "run.ps1",
-    )
-
-    print()
-    print(f"Executing {test_chart_path}")
-
-    cmd_args = [
-        "pwsh",
-        "-c",
-        test_chart_path,
-        "test",
-        "-v:detailed",
-    ]
-
-    assert 0 == subprocess.run(cmd_args).returncode
-
-
-if __name__ == "__main__":
-    print(
-        "Error: This script is not indented to executed direct. "
-        "Instead use `pytest worker_tests.py` to execute it.",
-        file=sys.stderr,
-        flush=True,
-    )
-    exit(1)
--- a/deploy/README.md
+++ b/deploy/README.md
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Dynamo Deployment Guide
+
+This directory contains all the necessary files and instructions for deploying Dynamo in various environments. Choose the deployment method that best suits your needs:
+
+## Directory Structure
+
+```
+deploy/
+├── cloud/                    # Cloud deployment configurations and tools
+├── helm/                     # Helm charts for manual Kubernetes deployment
+├── metrics/                  # Monitoring and metrics configuration
+├── sdk/                      # Dynamo SDK and related tools
+└── README.md                 # This file
+```
+
+## Deployment Options
+
+### 1. 🚀 Dynamo Cloud Platform [PREFERRED]
+
+The Dynamo Cloud Platform provides a managed deployment experience with:
+- Automated infrastructure management
+- Built-in monitoring and metrics
+- Simplified deployment process via `dynamo deploy` CLI commands
+- Production-ready configurations
+- Managed NATS and etcd dependencies
+
+For detailed instructions, see:
+- [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
+- [Operator Deployment Guide](../docs/guides/dynamo_deploy/operator_deployment.md)
+
+### 2. Manual Deployment with Helm Charts
+
+For users who need more control over their deployments:
+- Full control over deployment parameters
+- Manual management of infrastructure
+- Customizable monitoring setup
+- Flexible configuration options
+- Manual management of NATS and etcd dependencies
+
+Documentation:
+- [Manual Helm Deployment Guide](../docs/guides/dynamo_deploy/manual_helm_deployment.md)
+- [Minikube Setup Guide](../docs/guides/dynamo_deploy/minikube.md)
+
+## Choosing the Right Deployment Method
+
+- **Dynamo Cloud Platform**: Best for most users, provides managed deployment with built-in monitoring
+  - See [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
+  - Recommended for production deployments
+  - Simplifies dependency management
+  - Provides infrastructure for user management
+
+- **Manual Helm Deployment**: For users who need full control over their deployment
+  - See [Manual Helm Deployment Guide](../docs/guides/dynamo_deploy/manual_helm_deployment.md)
+  - Suitable for custom deployments
+  - Requires manual management of dependencies
+  - Provides maximum flexibility for users
+
+## Example Deployments
+
+To help you get started, we provide several example deployments:
+
+### Hello World Example
+A basic example to learn Dynamo deployment: [Hello World Example](../examples/hello_world/README.md#deploying-to-and-running-the-example-in-kubernetes)
+- Shows how to deploy a simple three-service pipeline that processes text
+- Provides step-by-step instructions for building your service and testing with port forwarding
+- Includes sample output showing the text flow between services
+
+### LLM Examples
+Example for deploying LLM services: [LLM Example](../examples/llm/README.md#deploy-to-kubernetes)
+- Demonstrates deploying and making inference requests against LLM models
+- Includes examples for both aggregated and disaggregated serving
+- Provides detailed deployment steps and testing instructions
--- a/deploy/dynamo/README.md
+++ b/deploy/dynamo/README.md
@@ -15,29 +15,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->

-# Deploying Dynamo inference graphs to Kubernetes
+# Dynamo Cloud Platform

-## Deployment Paths in Dynamo
+This directory contains the infrastructure components required for the Dynamo cloud platform, which is used when deploying with the `dynamo deploy` CLI commands.

-Dynamo provides two distinct deployment paths, each serving different purposes:
+For detailed documentation on setting up and using the Dynamo Cloud Platform, please refer to:
+- [Dynamo Cloud Platform Guide](../docs/guides/dynamo_deploy/dynamo_cloud.md)
+- [Operator Deployment Guide](../docs/guides/dynamo_deploy/operator_deployment.md)

-1. **Dynamo Cloud Platform** (`deploy/dynamo/helm/`)
-   - Contains the infrastructure components required for the Dynamo cloud platform
-   - Used when deploying with the `dynamo deploy` CLI commands
-   - Provides a managed deployment experience
-   - This README focuses on setting up this platform infrastructure
-   - For Dynamo cloud installation instructions, see [Installing Dynamo Cloud](./helm/README.md), which walks through installing and configuring the Dynamo cloud components on your Kubernetes cluster.
-
-2. **Manual Deployment with Helm Charts** (`deploy/Kubernetes/`)
-   - Used for manually deploying inference graphs to Kubernetes
-   - Contains Helm charts and configurations for deploying individual inference pipelines
-   - Documentation:
-        - [Deploying Dynamo Inference Graphs to Kubernetes using Helm](../Kubernetes/pipeline/README.md)
-        - [Dynamo Deploy Guide](../../docs/guides/dynamo_deploy.md)
-
-Choose the appropriate deployment path based on your needs:
- Use `deploy/Kubernetes/` if you want to manually manage your inference graph deployments
- Use `deploy/dynamo/helm/` if you want to use the Dynamo cloud platform and CLI tools
-
-## Hello World example
-See [examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli](../../examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli)
\ No newline at end of file
+For a quick start example, see [examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli](../../examples/hello_world/README.md#deploying-to-kubernetes-using-dynamo-cloud-and-dynamo-deploy-cli)
\ No newline at end of file
--- a/deploy/dynamo/api-store/.earthlyignore
+++ b/deploy/dynamo/api-store/.earthlyignore
--- a/deploy/dynamo/api-store/.env
+++ b/deploy/dynamo/api-store/.env
--- a/deploy/dynamo/api-store/Earthfile
+++ b/deploy/dynamo/api-store/Earthfile
--- a/deploy/dynamo/api-store/README.md
+++ b/deploy/dynamo/api-store/README.md
--- a/deploy/dynamo/api-store/ai_dynamo_store/.env
+++ b/deploy/dynamo/api-store/ai_dynamo_store/.env
--- a/deploy/dynamo/api-store/ai_dynamo_store/__init__.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/__init__.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/__init__.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/__init__.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/components.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/components.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/deployments.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/deployments.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/dynamo.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/dynamo.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/health_check.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/health_check.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/k8s.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/k8s.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/model.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/model.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/storage.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/storage.py
--- a/deploy/dynamo/api-store/ai_dynamo_store/api/test_deployments.py
+++ b/deploy/dynamo/api-store/ai_dynamo_store/api/test_deployments.py