Unverified Commit 7d2fc13e authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix: small planner manifest/doc fixes (#3129)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
parent 5915a69b
......@@ -29,7 +29,7 @@ spec:
command: ["python", "-m", "benchmarks.profiler.profile_sla"]
args:
- --config
- /data/configs/disagg.yaml
- ${DGD_CONFIG_FILE}
- --output-dir
- /data/profiling_results
- --namespace
......
......@@ -48,7 +48,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data/profiling_results
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
......
......@@ -66,7 +66,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /workspace/profiling_results
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
......@@ -82,7 +82,7 @@ spec:
- --environment=kubernetes
- --backend=trtllm
- --adjustment-interval=60
- --profile-results-dir=/workspace/profiling_results
- --profile-results-dir=/data/profiling_results
- --prometheus-port=9085
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: trtllm-disagg-planner
......
......@@ -13,6 +13,8 @@ spec:
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner"
- name: PROMETHEUS_PORT
value: "8000"
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
......@@ -48,7 +50,7 @@ spec:
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data/profiling_results
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
......
......@@ -70,6 +70,12 @@ log "Applying benchmarking manifests to namespace $NAMESPACE"
export NAMESPACE # ensure envsubst can see it
for mf in "$(dirname "$0")/manifests"/*.yaml; do
if [[ -f "$mf" ]]; then
# Skip pvc-access-pod.yaml as it's managed by inject_manifest.py
if [[ "$(basename "$mf")" == "pvc-access-pod.yaml" ]]; then
log "Skipping $mf (managed by inject_manifest.py)"
continue
fi
if command -v envsubst >/dev/null 2>&1; then
envsubst < "$mf" | kubectl -n "$NAMESPACE" apply -f -
else
......
......@@ -100,31 +100,6 @@ pip install -r deploy/utils/requirements.txt
Use the injector utility to place your DGD manifest into the PVC. The profiling job will read the path you specify.
```bash
# Inject your disagg manifest
python3 -m deploy.utils.inject_manifest \
--namespace $NAMESPACE \
--src components/backends/vllm/deploy/disagg.yaml \
--dest /data/configs/disagg.yaml
# Set the docker image for the profiling job; any docker image that contains your script.
export DOCKER_IMAGE=nvcr.io/nvidia/dynamo:latest-vllm
```
### Configure container image (optional)
You have two options for configuring your profiling setup:
**Option A: Use pre-built image with custom config injection (recommended)**
Use the default pre-built image and inject custom configurations via PVC:
1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag
```
2. **Inject your custom disagg configuration:**
```bash
# Use default disagg.yaml config
python3 -m deploy.utils.inject_manifest --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml
......@@ -138,16 +113,6 @@ Use the default pre-built image and inject custom configurations via PVC:
> **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path.
3. **Set the config path for the profiling job:**
```bash
export DGD_CONFIG_FILE=/workspace/profiling_results/disagg.yaml # or your custom path
```
This approach allows you to:
- Customize DGD configurations without rebuilding container images
- Test different model configurations easily
- Version control your DGD configs alongside your code
> **Important**: For profiling, disagg configs should be run with Grove disabled by adding the annotation `nvidia.com/enable-grove: "false"` to avoid alpha Grove status issues.
**Step 2: Set SLA target**
......@@ -173,13 +138,25 @@ spec:
- <vllm/sglang>
```
**Step 3: Run profiling (required)**
**Step 3: Define the container image and config path**
1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
```
3. **Set the config path for the profiling job:**
```bash
export DGD_CONFIG_FILE=/data/configs/disagg.yaml # should be the same path you set for --dest in Step 1
```
**Step 4: Run profiling (required)**
```bash
envsubst < benchmarks/profiler/deploy/profile_sla_job.yaml | kubectl apply -f -
```
**Step 4: Wait for profiling to complete**
**Step 5: Wait for profiling to complete**
```bash
kubectl get jobs -n $NAMESPACE
kubectl logs job/profile-sla -n $NAMESPACE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment