Unverified Commit 7d2fc13e authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix: small planner manifest/doc fixes (#3129)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
parent 5915a69b
...@@ -29,7 +29,7 @@ spec: ...@@ -29,7 +29,7 @@ spec:
command: ["python", "-m", "benchmarks.profiler.profile_sla"] command: ["python", "-m", "benchmarks.profiler.profile_sla"]
args: args:
- --config - --config
- /data/configs/disagg.yaml - ${DGD_CONFIG_FILE}
- --output-dir - --output-dir
- /data/profiling_results - /data/profiling_results
- --namespace - --namespace
......
...@@ -48,7 +48,7 @@ spec: ...@@ -48,7 +48,7 @@ spec:
pvc: pvc:
create: false create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data/profiling_results mountPoint: /data
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1 image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
......
...@@ -66,7 +66,7 @@ spec: ...@@ -66,7 +66,7 @@ spec:
pvc: pvc:
create: false create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results mountPoint: /data
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
...@@ -82,7 +82,7 @@ spec: ...@@ -82,7 +82,7 @@ spec:
- --environment=kubernetes - --environment=kubernetes
- --backend=trtllm - --backend=trtllm
- --adjustment-interval=60 - --adjustment-interval=60
- --profile-results-dir=/workspace/profiling_results - --profile-results-dir=/data/profiling_results
- --prometheus-port=9085 - --prometheus-port=9085
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently. Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: trtllm-disagg-planner dynamoNamespace: trtllm-disagg-planner
......
...@@ -13,6 +13,8 @@ spec: ...@@ -13,6 +13,8 @@ spec:
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}' value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE - name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner" value: "vllm-disagg-planner"
- name: PROMETHEUS_PORT
value: "8000"
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
...@@ -48,7 +50,7 @@ spec: ...@@ -48,7 +50,7 @@ spec:
pvc: pvc:
create: false create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data/profiling_results mountPoint: /data
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
......
...@@ -70,6 +70,12 @@ log "Applying benchmarking manifests to namespace $NAMESPACE" ...@@ -70,6 +70,12 @@ log "Applying benchmarking manifests to namespace $NAMESPACE"
export NAMESPACE # ensure envsubst can see it export NAMESPACE # ensure envsubst can see it
for mf in "$(dirname "$0")/manifests"/*.yaml; do for mf in "$(dirname "$0")/manifests"/*.yaml; do
if [[ -f "$mf" ]]; then if [[ -f "$mf" ]]; then
# Skip pvc-access-pod.yaml as it's managed by inject_manifest.py
if [[ "$(basename "$mf")" == "pvc-access-pod.yaml" ]]; then
log "Skipping $mf (managed by inject_manifest.py)"
continue
fi
if command -v envsubst >/dev/null 2>&1; then if command -v envsubst >/dev/null 2>&1; then
envsubst < "$mf" | kubectl -n "$NAMESPACE" apply -f - envsubst < "$mf" | kubectl -n "$NAMESPACE" apply -f -
else else
......
...@@ -100,31 +100,6 @@ pip install -r deploy/utils/requirements.txt ...@@ -100,31 +100,6 @@ pip install -r deploy/utils/requirements.txt
Use the injector utility to place your DGD manifest into the PVC. The profiling job will read the path you specify. Use the injector utility to place your DGD manifest into the PVC. The profiling job will read the path you specify.
```bash
# Inject your disagg manifest
python3 -m deploy.utils.inject_manifest \
--namespace $NAMESPACE \
--src components/backends/vllm/deploy/disagg.yaml \
--dest /data/configs/disagg.yaml
# Set the docker image for the profiling job; any docker image that contains your script.
export DOCKER_IMAGE=nvcr.io/nvidia/dynamo:latest-vllm
```
### Configure container image (optional)
You have two options for configuring your profiling setup:
**Option A: Use pre-built image with custom config injection (recommended)**
Use the default pre-built image and inject custom configurations via PVC:
1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag
```
2. **Inject your custom disagg configuration:**
```bash ```bash
# Use default disagg.yaml config # Use default disagg.yaml config
python3 -m deploy.utils.inject_manifest --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml python3 -m deploy.utils.inject_manifest --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml
...@@ -138,16 +113,6 @@ Use the default pre-built image and inject custom configurations via PVC: ...@@ -138,16 +113,6 @@ Use the default pre-built image and inject custom configurations via PVC:
> **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path. > **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path.
3. **Set the config path for the profiling job:**
```bash
export DGD_CONFIG_FILE=/workspace/profiling_results/disagg.yaml # or your custom path
```
This approach allows you to:
- Customize DGD configurations without rebuilding container images
- Test different model configurations easily
- Version control your DGD configs alongside your code
> **Important**: For profiling, disagg configs should be run with Grove disabled by adding the annotation `nvidia.com/enable-grove: "false"` to avoid alpha Grove status issues. > **Important**: For profiling, disagg configs should be run with Grove disabled by adding the annotation `nvidia.com/enable-grove: "false"` to avoid alpha Grove status issues.
**Step 2: Set SLA target** **Step 2: Set SLA target**
...@@ -173,13 +138,25 @@ spec: ...@@ -173,13 +138,25 @@ spec:
- <vllm/sglang> - <vllm/sglang>
``` ```
**Step 3: Run profiling (required)** **Step 3: Define the container image and config path**
1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
```
3. **Set the config path for the profiling job:**
```bash
export DGD_CONFIG_FILE=/data/configs/disagg.yaml # should be the same path you set for --dest in Step 1
```
**Step 4: Run profiling (required)**
```bash ```bash
envsubst < benchmarks/profiler/deploy/profile_sla_job.yaml | kubectl apply -f - envsubst < benchmarks/profiler/deploy/profile_sla_job.yaml | kubectl apply -f -
``` ```
**Step 4: Wait for profiling to complete** **Step 5: Wait for profiling to complete**
```bash ```bash
kubectl get jobs -n $NAMESPACE kubectl get jobs -n $NAMESPACE
kubectl logs job/profile-sla -n $NAMESPACE kubectl logs job/profile-sla -n $NAMESPACE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment