refactor: move backend deploy, launch and slurm files from components to examples (#3849)

Signed-off-by: Anant Sharma <anants@nvidia.com>

refactor: move backend deploy, launch and slurm files from components to examples (#3849)
Signed-off-by: Anant Sharma <anants@nvidia.com>
8bd37c96 · Anant Sharma · GitHub · 78359046 · 8bd37c96 · 8bd37c96
Unverified Commit 8bd37c96 authored Oct 31, 2025 by Anant Sharma Committed by GitHub Oct 31, 2025
20 changed files
--- a/examples/deployments/GKE/vllm/disagg.yaml
+++ b/examples/deployments/GKE/vllm/disagg.yaml
@@ -28,7 +28,7 @@ spec:
          startupProbe:
            initialDelaySeconds: 180
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -50,7 +50,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c

--- a/launch/dynamo-run/src/main.rs
+++ b/launch/dynamo-run/src/main.rs
@@ -94,7 +94,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
            "out" => {
                if val == "sglang" || val == "trtllm" || val == "vllm" {
                    tracing::error!(
-                        "To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`."
+                        "To run the {val} engine please use the Python interface, see root README or look in directory `examples/backends/`."
                    );
                    std::process::exit(1);
                }

--- a/lib/bindings/python/examples/metrics/README.md
+++ b/lib/bindings/python/examples/metrics/README.md
@@ -117,7 +117,7 @@ When you need to add or modify metrics in Method 1 (ForwardPassMetrics Pub/Sub v
   }
   ```

-4. **`components/backends/sglang/.../publisher.py`** - Update Python code to compute new metric:
+4. **`components/src/dynamo/sglang/publisher.py`** - Update Python code to compute new metric:
   ```python
   def collect_metrics():
       worker_stats = WorkerStats(
@@ -268,7 +268,7 @@ Dynamic Registration provides type hints (via `.pyi` stub files) for typed metri
 ```mermaid
 graph TB
    subgraph "Python Layer"
-        PY[Python Application<br/>components/backends/sglang/main.py]
+        PY[Python Application<br/>components/src/dynamo/sglang/main.py]
        style PY fill:#3776ab,color:#fff
    end


--- a/pyproject.toml
+++ b/pyproject.toml
@@ -154,7 +154,7 @@ addopts = [
    "--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*",
    "--ignore-glob=components/src/dynamo/vllm/multimodal_utils/*",
    "--ignore-glob=components/src/dynamo/vllm/multimodal_handlers/*",
-    "--ignore-glob=components/backends/sglang/slurm_jobs/*",
+    "--ignore-glob=examples/backends/sglang/slurm_jobs/*",
    # FIXME: Get relative/generic blob paths to work here
 ]
 xfail_strict = true

--- a/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
+++ b/recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml
@@ -48,7 +48,7 @@ spec:
            timeoutSeconds: 10
            failureThreshold: 600
          image: my-registry/sglang-wideep-runtime:my-tag
-          workingDir: /workspace/components/backends/sglang
+          workingDir: /workspace/examples/backends/sglang
          command:
            - python3
            - -m
@@ -101,7 +101,7 @@ spec:
            timeoutSeconds: 10
            failureThreshold: 600
          image: my-registry/sglang-wideep-runtime:my-tag
-          workingDir: /workspace/components/backends/sglang
+          workingDir: /workspace/examples/backends/sglang
          command:
            - python3
            - -m

--- a/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml
+++ b/recipes/deepseek-r1/sglang/disagg-8gpu/deploy.yaml
@@ -46,7 +46,7 @@ spec:
            timeoutSeconds: 10
            failureThreshold: 600
          image: my-registry/sglang-wideep-runtime:my-tag
-          workingDir: /workspace/components/backends/sglang
+          workingDir: /workspace/examples/backends/sglang
          command:
            - python3
            - -m
@@ -95,7 +95,7 @@ spec:
            timeoutSeconds: 10
            failureThreshold: 600
          image: my-registry/sglang-wideep-runtime:my-tag
-          workingDir: /workspace/components/backends/sglang
+          workingDir: /workspace/examples/backends/sglang
          command:
            - python3
            - -m

--- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
@@ -97,7 +97,7 @@ spec:
          - mountPath: /opt/dynamo/configs
            name: llm-config
            readOnly: true
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/examples/backends/trtllm
        volumes:
        - configMap:
            name: llm-config

--- a/recipes/llama-3-70b/vllm/agg/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/agg/deploy.yaml
@@ -19,7 +19,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
    VllmPrefillWorker:
      componentType: worker
@@ -43,7 +43,7 @@ spec:
          - /bin/sh
          - -c
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
      resources:
        limits:

--- a/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
@@ -19,7 +19,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
    VllmPrefillWorker:
      componentType: worker
@@ -43,7 +43,7 @@ spec:
          - /bin/sh
          - -c
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
      resources:
        limits:
@@ -72,7 +72,7 @@ spec:
          - /bin/sh
          - -c
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
      resources:
        limits:

--- a/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
@@ -19,7 +19,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
    VllmPrefillWorker:
      componentType: worker
@@ -53,7 +53,7 @@ spec:
          - /bin/sh
          - -c
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 2
      resources:
        limits:
@@ -92,7 +92,7 @@ spec:
          - /bin/sh
          - -c
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
      replicas: 1
      resources:
        limits:

--- a/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
+++ b/recipes/qwen3-32b-fp8/trtllm/agg/deploy.yaml
@@ -110,7 +110,7 @@ spec:
          - mountPath: /opt/dynamo/configs
            name: llm-config
            readOnly: true
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/examples/backends/trtllm
        volumes:
        - configMap:
            name: llm-config

--- a/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
+++ b/recipes/qwen3-32b-fp8/trtllm/disagg/deploy.yaml
@@ -270,7 +270,7 @@ spec:
          - mountPath: /opt/dynamo/configs
            name: llm-config-prefill
            readOnly: true
-          workingDir: /workspace/components/backends/trtllm
+          workingDir: /workspace/examples/backends/trtllm
        volumes:
        - configMap:
            name: llm-config-prefill
@@ -330,7 +330,7 @@ spec:
          - mountPath: /opt/dynamo/configs
            name: llm-config-decode
            readOnly: true
-          workingDir: /workspace/components/backends/trtllm
+            workingDir: /workspace/examples/backends/trtllm
        volumes:
        - configMap:
            name: llm-config-decode

--- a/tests/fault_tolerance/deploy/scenarios.py
+++ b/tests/fault_tolerance/deploy/scenarios.py
@@ -256,8 +256,8 @@ def _create_deployments_for_backend(backend: str) -> Dict[str, DeploymentInfo]:

    # Define the yaml files for agg and disagg deployments
    yaml_files = {
-        "agg": f"components/backends/{backend}/deploy/agg.yaml",
-        "disagg": f"components/backends/{backend}/deploy/disagg.yaml",
+        "agg": f"examples/backends/{backend}/deploy/agg.yaml",
+        "disagg": f"examples/backends/{backend}/deploy/disagg.yaml",
    }

    # Define the different configurations to test

--- a/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml
+++ b/tests/fault_tolerance/deploy/templates/vllm/moe_agg.yaml
@@ -49,7 +49,7 @@ spec:
        - name: nvcr-imagepullsecret
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
          - python3
          - -m

--- a/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml
+++ b/tests/fault_tolerance/deploy/templates/vllm/moe_disagg.yaml
@@ -52,7 +52,7 @@ spec:
        - name: nvcr-imagepullsecret
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
          - python3
          - -m
@@ -117,7 +117,7 @@ spec:
        - name: nvcr-imagepullsecret
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
          - python3
          - -m

--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
 The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
 prefill/decode workers based on TTFT, ITL, and request patterns.

-To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./components/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
+To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`

 ## Pre-Requisite: Pre-Deployment Profiling Data

@@ -170,12 +170,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati

 **Prepare the test deployment manifest:**

-The test requires modifying `components/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:
+The test requires modifying `examples/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:

 1. Copy the base deployment:

 ```bash
-cp components/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
+cp examples/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
 ```

 2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args:

--- a/tests/planner/perf_test_configs/agg_8b.yaml
+++ b/tests/planner/perf_test_configs/agg_8b.yaml
@@ -39,7 +39,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -89,7 +89,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c

--- a/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_2p2d.yaml
@@ -39,7 +39,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -89,7 +89,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -139,7 +139,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c

--- a/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_3p1d.yaml
@@ -39,7 +39,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -89,7 +89,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -139,7 +139,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c

--- a/tests/planner/perf_test_configs/disagg_8b_planner.yaml
+++ b/tests/planner/perf_test_configs/disagg_8b_planner.yaml
@@ -42,7 +42,7 @@ spec:
      extraPodSpec:
        mainContainer:
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - /bin/sh
            - -c
@@ -139,7 +139,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - python3
          args:
@@ -196,7 +196,7 @@ spec:
            periodSeconds: 10
            failureThreshold: 60
          image: my-registry/vllm-runtime:my-tag
-          workingDir: /workspace/components/backends/vllm
+          workingDir: /workspace/examples/backends/vllm
          command:
            - python3
          args: