fix: small planner manifest/doc fixes (#3129)

Signed-off-by: tmontfort <tmontfort@nvidia.com>

fix: small planner manifest/doc fixes (#3129)
Signed-off-by: tmontfort <tmontfort@nvidia.com>
7d2fc13e · Thomas Montfort · GitHub · 5915a69b · 7d2fc13e · 7d2fc13e
Unverified Commit 7d2fc13e authored Sep 19, 2025 by Thomas Montfort Committed by GitHub Sep 19, 2025
6 changed files
--- a/benchmarks/profiler/deploy/profile_sla_job.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_job.yaml
@@ -29,7 +29,7 @@ spec:
        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
        args:
          - --config
-          - /data/configs/disagg.yaml
+          - ${DGD_CONFIG_FILE}
          - --output-dir
          - /data/profiling_results
          - --namespace

--- a/components/backends/sglang/deploy/disagg_planner.yaml
+++ b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
      pvc:
        create: false
        name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /data/profiling_results
+        mountPoint: /data
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1

--- a/components/backends/trtllm/deploy/disagg_planner.yaml
+++ b/components/backends/trtllm/deploy/disagg_planner.yaml
@@ -66,7 +66,7 @@ spec:
      pvc:
        create: false
        name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
@@ -82,7 +82,7 @@ spec:
            - --environment=kubernetes
            - --backend=trtllm
            - --adjustment-interval=60
-            - --profile-results-dir=/workspace/profiling_results
+            - --profile-results-dir=/data/profiling_results
            - --prometheus-port=9085
    Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
      dynamoNamespace: trtllm-disagg-planner

--- a/components/backends/vllm/deploy/disagg_planner.yaml
+++ b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -13,6 +13,8 @@ spec:
      value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
    - name: DYNAMO_NAMESPACE
      value: "vllm-disagg-planner"
+    - name: PROMETHEUS_PORT
+      value: "8000"
  services:
    Frontend:
      dynamoNamespace: vllm-disagg-planner
@@ -48,7 +50,7 @@ spec:
      pvc:
        create: false
        name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /data/profiling_results
+        mountPoint: /data
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1

--- a/deploy/utils/setup_benchmarking_resources.sh
+++ b/deploy/utils/setup_benchmarking_resources.sh
@@ -70,6 +70,12 @@ log "Applying benchmarking manifests to namespace $NAMESPACE"
 export NAMESPACE  # ensure envsubst can see it
 for mf in "$(dirname "$0")/manifests"/*.yaml; do
  if [[ -f "$mf" ]]; then
+    # Skip pvc-access-pod.yaml as it's managed by inject_manifest.py
+    if [[ "$(basename "$mf")" == "pvc-access-pod.yaml" ]]; then
+      log "Skipping $mf (managed by inject_manifest.py)"
+      continue
+    fi
+
    if command -v envsubst >/dev/null 2>&1; then
      envsubst < "$mf" | kubectl -n "$NAMESPACE" apply -f -
    else

--- a/docs/benchmarks/pre_deployment_profiling.md
+++ b/docs/benchmarks/pre_deployment_profiling.md
@@ -100,31 +100,6 @@ pip install -r deploy/utils/requirements.txt

 Use the injector utility to place your DGD manifest into the PVC. The profiling job will read the path you specify.

-```bash
-# Inject your disagg manifest
-python3 -m deploy.utils.inject_manifest \
-  --namespace $NAMESPACE \
-  --src components/backends/vllm/deploy/disagg.yaml \
-  --dest /data/configs/disagg.yaml
-
-# Set the docker image for the profiling job; any docker image that contains your script.
-export DOCKER_IMAGE=nvcr.io/nvidia/dynamo:latest-vllm
-```
-
-### Configure container image (optional)
-
-You have two options for configuring your profiling setup:
-
-**Option A: Use pre-built image with custom config injection (recommended)**
-
-Use the default pre-built image and inject custom configurations via PVC:
-
-1. **Set the container image:**
-   ```bash
-   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag
-   ```
-
-2. **Inject your custom disagg configuration:**
   ```bash
   # Use default disagg.yaml config
   python3 -m deploy.utils.inject_manifest --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml
@@ -138,16 +113,6 @@ Use the default pre-built image and inject custom configurations via PVC:

   > **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path.

-3. **Set the config path for the profiling job:**
-   ```bash
-   export DGD_CONFIG_FILE=/workspace/profiling_results/disagg.yaml # or your custom path
-   ```
-
-This approach allows you to:
- Customize DGD configurations without rebuilding container images
- Test different model configurations easily
- Version control your DGD configs alongside your code
-
 > **Important**: For profiling, disagg configs should be run with Grove disabled by adding the annotation `nvidia.com/enable-grove: "false"` to avoid alpha Grove status issues.

 **Step 2: Set SLA target**
@@ -173,13 +138,25 @@ spec:
            - <vllm/sglang>
 ```

-**Step 3: Run profiling (required)**
+**Step 3: Define the container image and config path**
+
+1. **Set the container image:**
+   ```bash
+   export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
+   ```
+
+3. **Set the config path for the profiling job:**
+   ```bash
+   export DGD_CONFIG_FILE=/data/configs/disagg.yaml # should be the same path you set for --dest in Step 1
+   ```
+
+**Step 4: Run profiling (required)**

 ```bash
 envsubst < benchmarks/profiler/deploy/profile_sla_job.yaml | kubectl apply -f -
 ```

-**Step 4: Wait for profiling to complete**
+**Step 5: Wait for profiling to complete**
 ```bash
 kubectl get jobs -n $NAMESPACE
 kubectl logs job/profile-sla -n $NAMESPACE