Unverified Commit 8bd37c96 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

refactor: move backend deploy, launch and slurm files from components to examples (#3849)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 78359046
...@@ -28,7 +28,7 @@ spec: ...@@ -28,7 +28,7 @@ spec:
startupProbe: startupProbe:
initialDelaySeconds: 180 initialDelaySeconds: 180
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -50,7 +50,7 @@ spec: ...@@ -50,7 +50,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
......
...@@ -94,7 +94,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> { ...@@ -94,7 +94,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
"out" => { "out" => {
if val == "sglang" || val == "trtllm" || val == "vllm" { if val == "sglang" || val == "trtllm" || val == "vllm" {
tracing::error!( tracing::error!(
"To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`." "To run the {val} engine please use the Python interface, see root README or look in directory `examples/backends/`."
); );
std::process::exit(1); std::process::exit(1);
} }
......
...@@ -117,7 +117,7 @@ When you need to add or modify metrics in Method 1 (ForwardPassMetrics Pub/Sub v ...@@ -117,7 +117,7 @@ When you need to add or modify metrics in Method 1 (ForwardPassMetrics Pub/Sub v
} }
``` ```
4. **`components/backends/sglang/.../publisher.py`** - Update Python code to compute new metric: 4. **`components/src/dynamo/sglang/publisher.py`** - Update Python code to compute new metric:
```python ```python
def collect_metrics(): def collect_metrics():
worker_stats = WorkerStats( worker_stats = WorkerStats(
...@@ -268,7 +268,7 @@ Dynamic Registration provides type hints (via `.pyi` stub files) for typed metri ...@@ -268,7 +268,7 @@ Dynamic Registration provides type hints (via `.pyi` stub files) for typed metri
```mermaid ```mermaid
graph TB graph TB
subgraph "Python Layer" subgraph "Python Layer"
PY[Python Application<br/>components/backends/sglang/main.py] PY[Python Application<br/>components/src/dynamo/sglang/main.py]
style PY fill:#3776ab,color:#fff style PY fill:#3776ab,color:#fff
end end
......
...@@ -154,7 +154,7 @@ addopts = [ ...@@ -154,7 +154,7 @@ addopts = [
"--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*", "--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*",
"--ignore-glob=components/src/dynamo/vllm/multimodal_utils/*", "--ignore-glob=components/src/dynamo/vllm/multimodal_utils/*",
"--ignore-glob=components/src/dynamo/vllm/multimodal_handlers/*", "--ignore-glob=components/src/dynamo/vllm/multimodal_handlers/*",
"--ignore-glob=components/backends/sglang/slurm_jobs/*", "--ignore-glob=examples/backends/sglang/slurm_jobs/*",
# FIXME: Get relative/generic blob paths to work here # FIXME: Get relative/generic blob paths to work here
] ]
xfail_strict = true xfail_strict = true
......
...@@ -48,7 +48,7 @@ spec: ...@@ -48,7 +48,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/examples/backends/sglang
command: command:
- python3 - python3
- -m - -m
...@@ -101,7 +101,7 @@ spec: ...@@ -101,7 +101,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/examples/backends/sglang
command: command:
- python3 - python3
- -m - -m
......
...@@ -46,7 +46,7 @@ spec: ...@@ -46,7 +46,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/examples/backends/sglang
command: command:
- python3 - python3
- -m - -m
...@@ -95,7 +95,7 @@ spec: ...@@ -95,7 +95,7 @@ spec:
timeoutSeconds: 10 timeoutSeconds: 10
failureThreshold: 600 failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/examples/backends/sglang
command: command:
- python3 - python3
- -m - -m
......
...@@ -97,7 +97,7 @@ spec: ...@@ -97,7 +97,7 @@ spec:
- mountPath: /opt/dynamo/configs - mountPath: /opt/dynamo/configs
name: llm-config name: llm-config
readOnly: true readOnly: true
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/examples/backends/trtllm
volumes: volumes:
- configMap: - configMap:
name: llm-config name: llm-config
......
...@@ -19,7 +19,7 @@ spec: ...@@ -19,7 +19,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
...@@ -43,7 +43,7 @@ spec: ...@@ -43,7 +43,7 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
limits: limits:
......
...@@ -19,7 +19,7 @@ spec: ...@@ -19,7 +19,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
...@@ -43,7 +43,7 @@ spec: ...@@ -43,7 +43,7 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
limits: limits:
...@@ -72,7 +72,7 @@ spec: ...@@ -72,7 +72,7 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
limits: limits:
......
...@@ -19,7 +19,7 @@ spec: ...@@ -19,7 +19,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
...@@ -53,7 +53,7 @@ spec: ...@@ -53,7 +53,7 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 2 replicas: 2
resources: resources:
limits: limits:
...@@ -92,7 +92,7 @@ spec: ...@@ -92,7 +92,7 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
replicas: 1 replicas: 1
resources: resources:
limits: limits:
......
...@@ -110,7 +110,7 @@ spec: ...@@ -110,7 +110,7 @@ spec:
- mountPath: /opt/dynamo/configs - mountPath: /opt/dynamo/configs
name: llm-config name: llm-config
readOnly: true readOnly: true
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/examples/backends/trtllm
volumes: volumes:
- configMap: - configMap:
name: llm-config name: llm-config
......
...@@ -270,7 +270,7 @@ spec: ...@@ -270,7 +270,7 @@ spec:
- mountPath: /opt/dynamo/configs - mountPath: /opt/dynamo/configs
name: llm-config-prefill name: llm-config-prefill
readOnly: true readOnly: true
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/examples/backends/trtllm
volumes: volumes:
- configMap: - configMap:
name: llm-config-prefill name: llm-config-prefill
...@@ -330,7 +330,7 @@ spec: ...@@ -330,7 +330,7 @@ spec:
- mountPath: /opt/dynamo/configs - mountPath: /opt/dynamo/configs
name: llm-config-decode name: llm-config-decode
readOnly: true readOnly: true
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/examples/backends/trtllm
volumes: volumes:
- configMap: - configMap:
name: llm-config-decode name: llm-config-decode
......
...@@ -256,8 +256,8 @@ def _create_deployments_for_backend(backend: str) -> Dict[str, DeploymentInfo]: ...@@ -256,8 +256,8 @@ def _create_deployments_for_backend(backend: str) -> Dict[str, DeploymentInfo]:
# Define the yaml files for agg and disagg deployments # Define the yaml files for agg and disagg deployments
yaml_files = { yaml_files = {
"agg": f"components/backends/{backend}/deploy/agg.yaml", "agg": f"examples/backends/{backend}/deploy/agg.yaml",
"disagg": f"components/backends/{backend}/deploy/disagg.yaml", "disagg": f"examples/backends/{backend}/deploy/disagg.yaml",
} }
# Define the different configurations to test # Define the different configurations to test
......
...@@ -49,7 +49,7 @@ spec: ...@@ -49,7 +49,7 @@ spec:
- name: nvcr-imagepullsecret - name: nvcr-imagepullsecret
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
- -m - -m
......
...@@ -52,7 +52,7 @@ spec: ...@@ -52,7 +52,7 @@ spec:
- name: nvcr-imagepullsecret - name: nvcr-imagepullsecret
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
- -m - -m
...@@ -117,7 +117,7 @@ spec: ...@@ -117,7 +117,7 @@ spec:
- name: nvcr-imagepullsecret - name: nvcr-imagepullsecret
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
- -m - -m
......
...@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann ...@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
prefill/decode workers based on TTFT, ITL, and request patterns. prefill/decode workers based on TTFT, ITL, and request patterns.
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./components/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt` To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
## Pre-Requisite: Pre-Deployment Profiling Data ## Pre-Requisite: Pre-Deployment Profiling Data
...@@ -170,12 +170,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati ...@@ -170,12 +170,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati
**Prepare the test deployment manifest:** **Prepare the test deployment manifest:**
The test requires modifying `components/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments: The test requires modifying `examples/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:
1. Copy the base deployment: 1. Copy the base deployment:
```bash ```bash
cp components/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml cp examples/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
``` ```
2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args: 2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args:
......
...@@ -39,7 +39,7 @@ spec: ...@@ -39,7 +39,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -89,7 +89,7 @@ spec: ...@@ -89,7 +89,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
......
...@@ -39,7 +39,7 @@ spec: ...@@ -39,7 +39,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -89,7 +89,7 @@ spec: ...@@ -89,7 +89,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -139,7 +139,7 @@ spec: ...@@ -139,7 +139,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
......
...@@ -39,7 +39,7 @@ spec: ...@@ -39,7 +39,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -89,7 +89,7 @@ spec: ...@@ -89,7 +89,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -139,7 +139,7 @@ spec: ...@@ -139,7 +139,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
......
...@@ -42,7 +42,7 @@ spec: ...@@ -42,7 +42,7 @@ spec:
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- /bin/sh - /bin/sh
- -c - -c
...@@ -139,7 +139,7 @@ spec: ...@@ -139,7 +139,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
args: args:
...@@ -196,7 +196,7 @@ spec: ...@@ -196,7 +196,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/examples/backends/vllm
command: command:
- python3 - python3
args: args:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment