Unverified Commit 8bd37c96 authored by Anant Sharma's avatar Anant Sharma Committed by GitHub
Browse files

refactor: move backend deploy, launch and slurm files from components to examples (#3849)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
parent 78359046
......@@ -28,7 +28,7 @@ spec:
startupProbe:
initialDelaySeconds: 180
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -50,7 +50,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......
......@@ -94,7 +94,7 @@ async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
"out" => {
if val == "sglang" || val == "trtllm" || val == "vllm" {
tracing::error!(
"To run the {val} engine please use the Python interface, see root README or look in directory `components/backends/`."
"To run the {val} engine please use the Python interface, see root README or look in directory `examples/backends/`."
);
std::process::exit(1);
}
......
......@@ -117,7 +117,7 @@ When you need to add or modify metrics in Method 1 (ForwardPassMetrics Pub/Sub v
}
```
4. **`components/backends/sglang/.../publisher.py`** - Update Python code to compute new metric:
4. **`components/src/dynamo/sglang/publisher.py`** - Update Python code to compute new metric:
```python
def collect_metrics():
worker_stats = WorkerStats(
......@@ -268,7 +268,7 @@ Dynamic Registration provides type hints (via `.pyi` stub files) for typed metri
```mermaid
graph TB
subgraph "Python Layer"
PY[Python Application<br/>components/backends/sglang/main.py]
PY[Python Application<br/>components/src/dynamo/sglang/main.py]
style PY fill:#3776ab,color:#fff
end
......
......@@ -154,7 +154,7 @@ addopts = [
"--ignore-glob=components/src/dynamo/sglang/multimodal_utils/*",
"--ignore-glob=components/src/dynamo/vllm/multimodal_utils/*",
"--ignore-glob=components/src/dynamo/vllm/multimodal_handlers/*",
"--ignore-glob=components/backends/sglang/slurm_jobs/*",
"--ignore-glob=examples/backends/sglang/slurm_jobs/*",
# FIXME: Get relative/generic blob paths to work here
]
xfail_strict = true
......
......@@ -48,7 +48,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
workingDir: /workspace/examples/backends/sglang
command:
- python3
- -m
......@@ -101,7 +101,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
workingDir: /workspace/examples/backends/sglang
command:
- python3
- -m
......
......@@ -46,7 +46,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
workingDir: /workspace/examples/backends/sglang
command:
- python3
- -m
......@@ -95,7 +95,7 @@ spec:
timeoutSeconds: 10
failureThreshold: 600
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
workingDir: /workspace/examples/backends/sglang
command:
- python3
- -m
......
......@@ -97,7 +97,7 @@ spec:
- mountPath: /opt/dynamo/configs
name: llm-config
readOnly: true
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/examples/backends/trtllm
volumes:
- configMap:
name: llm-config
......
......@@ -19,7 +19,7 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
VllmPrefillWorker:
componentType: worker
......@@ -43,7 +43,7 @@ spec:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
limits:
......
......@@ -19,7 +19,7 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
VllmPrefillWorker:
componentType: worker
......@@ -43,7 +43,7 @@ spec:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
limits:
......@@ -72,7 +72,7 @@ spec:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
limits:
......
......@@ -19,7 +19,7 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
VllmPrefillWorker:
componentType: worker
......@@ -53,7 +53,7 @@ spec:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 2
resources:
limits:
......@@ -92,7 +92,7 @@ spec:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
replicas: 1
resources:
limits:
......
......@@ -110,7 +110,7 @@ spec:
- mountPath: /opt/dynamo/configs
name: llm-config
readOnly: true
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/examples/backends/trtllm
volumes:
- configMap:
name: llm-config
......
......@@ -270,7 +270,7 @@ spec:
- mountPath: /opt/dynamo/configs
name: llm-config-prefill
readOnly: true
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/examples/backends/trtllm
volumes:
- configMap:
name: llm-config-prefill
......@@ -330,7 +330,7 @@ spec:
- mountPath: /opt/dynamo/configs
name: llm-config-decode
readOnly: true
workingDir: /workspace/components/backends/trtllm
workingDir: /workspace/examples/backends/trtllm
volumes:
- configMap:
name: llm-config-decode
......
......@@ -256,8 +256,8 @@ def _create_deployments_for_backend(backend: str) -> Dict[str, DeploymentInfo]:
# Define the yaml files for agg and disagg deployments
yaml_files = {
"agg": f"components/backends/{backend}/deploy/agg.yaml",
"disagg": f"components/backends/{backend}/deploy/disagg.yaml",
"agg": f"examples/backends/{backend}/deploy/agg.yaml",
"disagg": f"examples/backends/{backend}/deploy/disagg.yaml",
}
# Define the different configurations to test
......
......@@ -49,7 +49,7 @@ spec:
- name: nvcr-imagepullsecret
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- python3
- -m
......
......@@ -52,7 +52,7 @@ spec:
- name: nvcr-imagepullsecret
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- python3
- -m
......@@ -117,7 +117,7 @@ spec:
- name: nvcr-imagepullsecret
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- python3
- -m
......
......@@ -10,7 +10,7 @@ This directory contains comprehensive testing tools for validating the SLA plann
The SLA planner monitors metrics every 60 seconds (default adjustment interval) and scales
prefill/decode workers based on TTFT, ITL, and request patterns.
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./components/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
To setup the environment, simply use the released docker images for any backends, or build your own docker image following the READMEs in `./examples/backends/<vllm/sglang/trtllm>/README.md`, or follow the `Developing Locally` section in [README.md](../../README.md) to setup the environment locally. If using the local environment, make sure to install dependencies by running `UV_GIT_LFS=1 uv pip install --no-cache -r container/deps/requirements.txt`
## Pre-Requisite: Pre-Deployment Profiling Data
......@@ -170,12 +170,12 @@ Test complete scaling behavior including Kubernetes deployment and load generati
**Prepare the test deployment manifest:**
The test requires modifying `components/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:
The test requires modifying `examples/backends/vllm/deploy/disagg_planner.yaml` with test-specific planner arguments:
1. Copy the base deployment:
```bash
cp components/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
cp examples/backends/vllm/deploy/disagg_planner.yaml tests/planner/scaling/disagg_planner.yaml
```
2. Edit `tests/planner/scaling/disagg_planner.yaml`. Ensure all services use the correct image. Modify the Planner service args:
......
......@@ -39,7 +39,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -89,7 +89,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......
......@@ -39,7 +39,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -89,7 +89,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -139,7 +139,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......
......@@ -39,7 +39,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -89,7 +89,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -139,7 +139,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......
......@@ -42,7 +42,7 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- /bin/sh
- -c
......@@ -139,7 +139,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- python3
args:
......@@ -196,7 +196,7 @@ spec:
periodSeconds: 10
failureThreshold: 60
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
workingDir: /workspace/examples/backends/vllm
command:
- python3
args:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment