feat: update benchmarking and deploy utils (#2933)

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>

feat: update benchmarking and deploy utils (#2933)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
09c7b73c · hhzhang16 · GitHub · 7dd872ae · 09c7b73c · 09c7b73c
Unverified Commit 09c7b73c authored Sep 08, 2025 by hhzhang16 Committed by GitHub Sep 08, 2025
15 changed files
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -61,6 +61,11 @@ The benchmarking framework supports:
 - Customizable concurrency levels (configurable via CONCURRENCIES env var), sequence lengths, and models
 - Automated performance plot generation with custom labels
+**Sequential GPU Usage:**
+- Models are deployed and benchmarked **sequentially**, not in parallel
+- Each deployment gets exclusive access to all available GPUs during its benchmark run
+- Ensures accurate performance measurements and fair comparison across configurations
 **Supported Backends:**
 - DynamoGraphDeployments
 - External HTTP endpoints (for comparison with non-Dynamo backends)

--- a/benchmarks/benchmark.sh
+++ b/benchmarks/benchmark.sh
@@ -11,7 +11,7 @@ DYNAMO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
 # Configuration - all set via command line arguments
 NAMESPACE=""
-MODEL="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+MODEL="Qwen/Qwen3-0.6B"
 ISL=2000
 STD=10
 OSL=256
@@ -46,7 +46,7 @@ REQUIRED:
 OPTIONS:
    -h, --help                    Show this help message
-    -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
+    -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
                                  NOTE: This must match the model configured in your deployment manifests and the model deployed in any endpoints.
    -i, --isl LENGTH              Input sequence length (default: $ISL)
    -s, --std STDDEV              Input sequence standard deviation (default: $STD)

--- a/benchmarks/profiler/deploy/profile_sla_job.yaml
+++ b/benchmarks/profiler/deploy/profile_sla_job.yaml
@@ -29,9 +29,9 @@ spec:
        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
        args:
          - --config
-          - /workspace/configs/disagg.yaml
+          - /data/configs/disagg.yaml
          - --output-dir
-          - /workspace/profiling_results
+          - /data/profiling_results
          - --namespace
          - ${NAMESPACE}
          - --backend
@@ -50,15 +50,10 @@ spec:
          - "20"
        volumeMounts:
          - name: output-volume
-            mountPath: /workspace/profiling_results
+            mountPath: /data
-          - name: configs
-            mountPath: /workspace/configs
      restartPolicy: Never
      volumes:
        - name: output-volume
          persistentVolumeClaim:
            claimName: dynamo-pvc
-        - name: configs
-          persistentVolumeClaim:
-            claimName: dynamo-pvc
  backoffLimit: 0
--- a/benchmarks/profiler/utils/__init__.py
+++ b/benchmarks/profiler/utils/__init__.py
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
--- a/benchmarks/utils/benchmark.py
+++ b/benchmarks/utils/benchmark.py
@@ -54,17 +54,17 @@ def main() -> int:
        help="Input in format <label>=<manifest_path_or_endpoint>. Can be specified multiple times for comparisons.",
    )
    parser.add_argument("--namespace", required=True, help="Kubernetes namespace")
-    parser.add_argument("--isl", type=int, default=200, help="Input sequence length")
+    parser.add_argument("--isl", type=int, default=2000, help="Input sequence length")
    parser.add_argument(
        "--std",
        type=int,
        default=10,
        help="Input sequence standard deviation",
    )
-    parser.add_argument("--osl", type=int, default=200, help="Output sequence length")
+    parser.add_argument("--osl", type=int, default=256, help="Output sequence length")
    parser.add_argument(
        "--model",
-        default="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
+        default="Qwen/Qwen3-0.6B",
        help="Model name",
    )
    parser.add_argument(

--- a/components/backends/sglang/deploy/disagg_planner.yaml
+++ b/components/backends/sglang/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
      pvc:
        create: false
        name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data/profiling_results
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
@@ -62,7 +62,7 @@ spec:
              --environment=kubernetes
              --backend=sglang
              --adjustment-interval=60
-              --profile-results-dir=/workspace/profiling_results
+              --profile-results-dir=/data/profiling_results
    Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
      dynamoNamespace: dynamo
      componentType: frontend

--- a/components/backends/vllm/deploy/disagg_planner.yaml
+++ b/components/backends/vllm/deploy/disagg_planner.yaml
@@ -48,7 +48,7 @@ spec:
      pvc:
        create: false
        name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
-        mountPoint: /workspace/profiling_results
+        mountPoint: /data/profiling_results
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
@@ -62,7 +62,7 @@ spec:
              --environment=kubernetes
              --backend=vllm
              --adjustment-interval=60
-              --profile-results-dir=/workspace/profiling_results
+              --profile-results-dir=/data/profiling_results
    Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
      dynamoNamespace: vllm-disagg-planner
      componentType: frontend

--- a/deploy/utils/README.md
+++ b/deploy/utils/README.md
@@ -88,23 +88,36 @@ These scripts interact with the Persistent Volume Claim (PVC) that stores config
 ```bash
 # The profiling job reads your DGD config from the PVC
-python3 deploy/utils/inject_manifest.py \
+# IMPORTANT: All paths must start with /data/ for security reasons
+python3 -m deploy.utils.inject_manifest \
  --namespace $NAMESPACE \
  --src ./my-disagg.yaml \
-  --dest /configs/disagg.yaml
+  --dest /data/configs/disagg.yaml
 ```
 **Download benchmark/profiling results:**
 ```bash
 # After benchmarking or profiling completes, download results
-python3 deploy/utils/download_pvc_results.py \
+python3 -m deploy.utils.download_pvc_results \
  --namespace $NAMESPACE \
  --output-dir ./pvc_files \
-  --folder /results \
+  --folder /data/results \
  --no-config   # optional: skip *.yaml/*.yml in the download
 ```
+#### Path Requirements
+**Important**: The PVC is mounted at `/data` in the access pod for security reasons. All destination paths must start with `/data/`.
+**Common path patterns:**
+- `/data/configs/` - Configuration files (DGD manifests)
+- `/data/results/` - Benchmark results
+- `/data/profiling_results/` - Profiling data
+- `/data/benchmarking/` - Benchmarking artifacts
+**User-friendly error messages**: If you forget the `/data/` prefix, the script will show a helpful error message with the correct path and example commands.
 #### Next Steps
 For complete benchmarking workflows:

--- a/deploy/utils/download_pvc_results.py
+++ b/deploy/utils/download_pvc_results.py
@@ -23,7 +23,7 @@ Creates an access pod, copies files, and exits. You can optionally exclude YAML
 Usage:
    python3 download_pvc_results.py --namespace <namespace> --output-dir <local_directory> \
-        --folder </absolute/folder/in/pvc> [--no-config]
+        --folder /data/<folder/in/pvc> [--no-config]
 """
 import argparse
@@ -36,7 +36,7 @@ try:
    from deploy.utils.kubernetes import (
        check_kubectl_access,
        cleanup_access_pod,
-        deploy_access_pod,
+        ensure_clean_access_pod,
        run_command,
    )
 except ModuleNotFoundError:
@@ -46,7 +46,7 @@ except ModuleNotFoundError:
    from deploy.utils.kubernetes import (
        check_kubectl_access,
        cleanup_access_pod,
-        deploy_access_pod,
+        ensure_clean_access_pod,
        run_command,
    )
@@ -182,11 +182,22 @@ def main():
    parser.add_argument(
        "--folder",
        required=True,
-        help="Absolute folder path in the PVC to download, e.g. /profiling_results or /benchmarking_results",
+        help="Absolute folder path in the PVC to download, must start with /data/, e.g. /data/profiling_results or /data/benchmarking_results",
    )
    args = parser.parse_args()
+    # Validate folder path starts with /data/
+    if not args.folder.startswith("/data/"):
+        print("❌ Error: Folder path must start with '/data/'")
+        print(f"   Provided: {args.folder}")
+        print("   Quick Fix: Add '/data/' prefix to your path")
+        print("   Examples:")
+        print("     /profiling_results → /data/profiling_results")
+        print("     /benchmarking_results → /data/benchmarking_results")
+        print("     /configs → /data/configs")
+        sys.exit(1)
    print("📥 PVC Results Download")
    print("=" * 40)
@@ -194,7 +205,7 @@ def main():
    check_kubectl_access(args.namespace)
    # Deploy access pod
-    pod_name = deploy_access_pod(args.namespace)
+    pod_name = ensure_clean_access_pod(args.namespace)
    try:
        # List and download files
        files = list_pvc_contents(args.namespace, pod_name, args.folder, args.no_config)

--- a/deploy/utils/inject_manifest.py
+++ b/deploy/utils/inject_manifest.py
@@ -21,12 +21,15 @@ Manifest Injection Script
 Copies any Kubernetes manifest file into the PVC for later use by jobs.
 Both the source manifest path and destination path in the PVC are required.
+IMPORTANT: The PVC is mounted at /data in the access pod for security reasons.
+All destination paths must start with '/data/'.
 Usage:
    python3 inject_manifest.py --namespace <namespace> --src <local_manifest.yaml> --dest <absolute_path_in_pvc>
 Examples:
-    python3 inject_manifest.py --namespace <ns> --src ./my-disagg.yaml --dest /configs/disagg.yaml
+    python3 inject_manifest.py --namespace <ns> --src ./disagg.yaml --dest /data/configs/disagg.yaml
-    python3 inject_manifest.py --namespace <ns> --src ./my-agg.yaml    --dest /configs/agg.yaml
+    python3 inject_manifest.py --namespace <ns> --src ./my-data.yaml    --dest /data/custom/path/data.yaml
 """
 import argparse
@@ -37,7 +40,7 @@ from deploy.utils.kubernetes import (
    PVC_ACCESS_POD_NAME,
    check_kubectl_access,
    cleanup_access_pod,
-    deploy_access_pod,
+    ensure_clean_access_pod,
    run_command,
 )
@@ -100,16 +103,39 @@ def main():
    parser.add_argument(
        "--dest",
        required=True,
-        help="Absolute target path in PVC (e.g., /profiling_results/agg.yaml)",
+        help="Absolute target path in PVC (must start with /data/, e.g., /data/configs/agg.yaml)",
    )
    args = parser.parse_args()
-    # Validate target_path to prevent directory traversal
+    # Validate target_path to prevent directory traversal and ensure it's within PVC
-    if not args.dest.startswith("/"):
+    if not args.dest.startswith("/data/"):
-        print(
+        print("=" * 60)
-            "ERROR: Target path must be an absolute path inside the PVC (start with '/')."
+        print("❌ ERROR: Invalid target path")
-        )
+        print("=" * 60)
+        print("The PVC is mounted at /data in the access pod.")
+        print("All paths must start with '/data/' for security reasons.")
+        print("")
+        print("💡 QUICK FIX:")
+        if args.dest.startswith("/"):
+            # Suggest the fix
+            suggested_path = f"/data{args.dest}"
+            print(f"  Change: {args.dest}")
+            print(f"  To:     {suggested_path}")
+            print("")
+            print("📝 Example commands:")
+            print("  python3 -m deploy.utils.inject_manifest \\")
+            print(f"    --namespace {args.namespace} \\")
+            print(f"    --src {args.src} \\")
+            print(f"    --dest {suggested_path}")
+        else:
+            print(f"  Use: /data/{args.dest.lstrip('/')}")
+        print("")
+        print("🔍 Common patterns:")
+        print("  /configs/file.yaml     → /data/configs/file.yaml")
+        print("  /results/data.yaml     → /data/results/data.yaml")
+        print("  /profiling_results/... → /data/profiling_results/...")
+        print("=" * 60)
        sys.exit(1)
    if ".." in args.dest:
@@ -123,7 +149,7 @@ def main():
    check_kubectl_access(args.namespace)
    # Deploy access pod
-    deploy_access_pod(args.namespace)
+    ensure_clean_access_pod(args.namespace)
    try:
        # Copy manifest
        copy_manifest(args.namespace, args.src, args.dest)

--- a/deploy/utils/kubernetes.py
+++ b/deploy/utils/kubernetes.py
@@ -22,7 +22,7 @@ PVC_ACCESS_POD_NAME = "pvc-access-pod"
 def run_command(
-    cmd: List[str], capture_output: bool = True
+    cmd: List[str], capture_output: bool = True, exit_on_error: bool = True
 ) -> subprocess.CompletedProcess:
    """Run a command and handle errors."""
    try:
@@ -37,7 +37,10 @@ def run_command(
            print(f"STDOUT: {e.stdout}")
        if e.stderr:
            print(f"STDERR: {e.stderr}")
-        sys.exit(1)
+        if exit_on_error:
+            sys.exit(1)
+        else:
+            raise
 def check_kubectl_access(namespace: str) -> None:
@@ -47,6 +50,55 @@ def check_kubectl_access(namespace: str) -> None:
    print("✓ kubectl access confirmed")
+def ensure_clean_access_pod(namespace: str) -> str:
+    """Ensure a clean PVC access pod deployment by deleting any existing pod first."""
+    # Check if pod exists and delete it if it does
+    try:
+        result = subprocess.run(
+            [
+                "kubectl",
+                "get",
+                "pod",
+                PVC_ACCESS_POD_NAME,
+                "-n",
+                namespace,
+                "-o",
+                "jsonpath={.metadata.name}",
+            ],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        if result.returncode == 0 and result.stdout.strip() == PVC_ACCESS_POD_NAME:
+            print(f"Found existing access pod '{PVC_ACCESS_POD_NAME}', deleting it...")
+            run_command(
+                [
+                    "kubectl",
+                    "delete",
+                    "pod",
+                    PVC_ACCESS_POD_NAME,
+                    "-n",
+                    namespace,
+                    "--ignore-not-found",
+                ],
+                capture_output=False,
+                exit_on_error=False,
+            )
+            print("✓ Existing access pod deleted")
+    except Exception:
+        pass  # Pod doesn't exist, which is fine
+    try:
+        return deploy_access_pod(namespace)
+    except Exception as e:
+        print(f"Deployment failed: {e}")
+        print(
+            "Pod left running for debugging. Use 'kubectl delete pod pvc-access-pod -n <namespace>' to clean up manually."
+        )
+        raise
 def deploy_access_pod(namespace: str) -> str:
    """Deploy the PVC access pod and return pod name."""
@@ -67,25 +119,19 @@ def deploy_access_pod(namespace: str) -> str:
            text=True,
            check=False,
        )
        if result.returncode == 0 and result.stdout.strip() == "Running":
            print(f"✓ Access pod '{PVC_ACCESS_POD_NAME}' already running")
            return PVC_ACCESS_POD_NAME
    except Exception:
-        # Pod doesn't exist or isn't running
+        pass  # Pod doesn't exist or isn't running
-        pass
    print(f"Deploying access pod '{PVC_ACCESS_POD_NAME}' in namespace '{namespace}'...")
-    # Get the directory where this script is located
+    pod_yaml_path = Path(__file__).parent / "manifests" / "pvc-access-pod.yaml"
-    script_dir = Path(__file__).parent
-    pod_yaml_path = script_dir / "manifests" / "pvc-access-pod.yaml"
    if not pod_yaml_path.exists():
        print(f"ERROR: Pod YAML not found at {pod_yaml_path}")
        sys.exit(1)
-    # Deploy the pod
    run_command(
        ["kubectl", "apply", "-f", str(pod_yaml_path), "-n", namespace],
        capture_output=False,
@@ -103,6 +149,7 @@ def deploy_access_pod(namespace: str) -> str:
            "--timeout=60s",
        ],
        capture_output=False,
+        exit_on_error=False,
    )
    print("✓ Access pod is ready")
    return PVC_ACCESS_POD_NAME
@@ -110,16 +157,20 @@ def deploy_access_pod(namespace: str) -> str:
 def cleanup_access_pod(namespace: str) -> None:
    print("Cleaning up access pod...")
-    run_command(
+    try:
-        [
+        run_command(
-            "kubectl",
+            [
-            "delete",
+                "kubectl",
-            "pod",
+                "delete",
-            PVC_ACCESS_POD_NAME,
+                "pod",
-            "-n",
+                PVC_ACCESS_POD_NAME,
-            namespace,
+                "-n",
-            "--ignore-not-found",
+                namespace,
-        ],
+                "--ignore-not-found",
-        capture_output=False,
+            ],
-    )
+            capture_output=False,
-    print("✓ Access pod deleted")
+            exit_on_error=False,
+        )
+        print("✓ Access pod deleted")
+    except Exception as e:
+        print(f"Warning: Failed to clean up access pod: {e}")
--- a/deploy/utils/manifests/pvc-access-pod.yaml
+++ b/deploy/utils/manifests/pvc-access-pod.yaml
@@ -26,7 +26,7 @@ spec:
        - ALL
    volumeMounts:
    - name: profiling-storage
-      mountPath: /profiling_results
+      mountPath: /data
    resources:
      requests:
        memory: "128Mi"

--- a/deploy/utils/setup_k8s_namespace.sh
+++ b/deploy/utils/setup_k8s_namespace.sh
@@ -60,7 +60,7 @@ Sets up Kubernetes namespace for Dynamo (one-time per namespace):
      * Installs/updates the operator Helm release using that image
      * If credentials (DOCKER_USERNAME/DOCKER_PASSWORD) are provided, creates/updates docker-imagepullsecret
      * If credentials are not provided, prompts interactively to create the pull secret
-  - Otherwise installs the operator using default image: nvcr.io/nvidia/ai-dynamo/kubernetes-operator:0.4.0
+  - Otherwise installs the operator using default image: nvcr.io/nvidia/ai-dynamo/kubernetes-operator:0.4.1
 Environment variables:
  NAMESPACE         Target Kubernetes namespace (default: default)
@@ -157,7 +157,7 @@ if [[ -n "$DOCKER_SERVER" && -n "$IMAGE_TAG" ]]; then
  fi
 else
  # Use default published image when custom not provided
-  DEFAULT_OPERATOR_IMAGE="nvcr.io/nvidia/ai-dynamo/kubernetes-operator:0.4.0"
+  DEFAULT_OPERATOR_IMAGE="nvcr.io/nvidia/ai-dynamo/kubernetes-operator:0.4.1"
  if ! command -v helm &>/dev/null; then warn "helm not found; skipping helm install"; else
    pushd "$REPO_ROOT/deploy/cloud/helm/platform" >/dev/null
    helm dep build

--- a/docs/benchmarks/benchmarking.md
+++ b/docs/benchmarks/benchmarking.md
@@ -33,7 +33,7 @@ The framework is a wrapper around `genai-perf` that:
 **Default sequence lengths**: Input: 2000 tokens, Output: 256 tokens (configurable with `--isl` and `--osl`)
-**Important**: The `--model` parameter configures GenAI-Perf for benchmarking and provides logging context. The actual model loaded is determined by your deployment manifests. Only one model can be benchmarked at a time across all inputs to ensure fair comparison. The default `--model` value in the benchmarking script is `deepseek-ai/DeepSeek-R1-Distill-Llama-8B`, but it must match the model in the manifest(s) and the model deployed at the endpoint(s).
+**Important**: The `--model` parameter configures GenAI-Perf for benchmarking and provides logging context. The actual model loaded is determined by your deployment manifests. Only one model can be benchmarked at a time across all inputs to ensure fair comparison. The default `--model` value in the benchmarking script is `Qwen/Qwen3-0.6B`, but it must match the model in the manifest(s) and the model deployed at the endpoint(s).
 ## Prerequisites
@@ -103,7 +103,7 @@ REQUIRED:
 OPTIONS:
  -h, --help                    Show help message and examples
-  -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
+  -m, --model MODEL             Model name for GenAI-Perf configuration and logging (default: Qwen/Qwen3-0.6B)
                                NOTE: This must match the model configured in your deployment manifests and endpoints
  -i, --isl LENGTH              Input sequence length (default: 2000)
  -s, --std STDDEV              Input sequence standard deviation (default: 10)
@@ -130,6 +130,23 @@ The script automatically:
 4. **Generates** comparison plots using your custom labels in `./benchmarks/results/plots/`
 5. **Cleans up** deployments when complete
+### GPU Resource Usage
+**Important**: Models are deployed and benchmarked **sequentially**, not in parallel. This means:
+- **One deployment at a time**: Each DynamoGraphDeployment is deployed, benchmarked, and cleaned up before the next one starts
+- **Full GPU access**: Each deployment gets exclusive access to all available GPUs during its benchmark run
+- **Resource isolation**: No resource conflicts between different deployment configurations
+- **Fair comparison**: Each configuration is tested under identical resource conditions
+This sequential approach ensures:
+- **Accurate performance measurements** without interference between deployments
+- **Consistent resource allocation** for fair comparison across different configurations
+- **Simplified resource management** without complex GPU scheduling
+- **Reliable cleanup** between benchmark runs
+If you need to benchmark multiple configurations simultaneously, consider using separate Kubernetes namespaces or running benchmarks on different clusters.
 ### Results Clearing Behavior
 **Important**: The benchmark script automatically clears the output directory before each run to ensure clean, reproducible results. This means:
@@ -155,7 +172,7 @@ For direct control over the benchmark workflow:
 ```bash
 # Endpoint benchmarking
 python3 -u -m benchmarks.utils.benchmark \
-   --endpoint "http://your-endpoint:8000" \
+   --input trtllm=http://your-endpoint:8000 \
   --namespace $NAMESPACE \
   --isl 2000 \
   --std 10 \

--- a/docs/benchmarks/pre_deployment_profiling.md
+++ b/docs/benchmarks/pre_deployment_profiling.md
@@ -24,6 +24,21 @@ We assume there is no piggy-backed prefill requests in the decode engine. Even i
 The script will first detect the number of available GPUs on the current nodes (multi-node engine not supported yet). Then, it will profile the prefill and decode performance with different TP sizes. For prefill, since there is no in-flight batching (assume isl is long enough to saturate the GPU), the script directly measures the TTFT for a request with given isl without kv-reusing. For decode, since the ITL (or iteration time) is relevant with how many requests are in-flight, the script will measure the ITL under different number of in-flight requests. The range of the number of in-flight requests is from 1 to the maximum number of requests that the kv cache of the engine can hold. To measure the ITL without being affected by piggy-backed prefill requests, the script will enable kv-reuse and warm up the engine by issuing the same prompts before measuring the ITL. Since the kv cache is sufficient for all the requests, it can hold the kv cache of the pre-computed prompts and skip the prefill phase when measuring the ITL.
+### GPU Resource Usage
+**Important**: Profiling tests different tensor parallelism (TP) configurations **sequentially**, not in parallel. This means:
+- **One TP configuration at a time**: Each tensor parallelism size (TP1, TP2, TP4, TP8, etc.) is tested individually
+- **Full GPU access**: Each TP configuration gets exclusive access to all available GPUs during its profiling run
+- **Resource isolation**: No interference between different TP configurations during testing
+- **Accurate measurements**: Each configuration is profiled under identical resource conditions
+This sequential approach ensures:
+- **Precise performance profiling** without resource conflicts
+- **Consistent GPU allocation** for fair comparison across TP sizes
+- **Reliable cleanup** between different TP configuration tests
+- **Accurate SLA compliance verification** for each configuration
 After the profiling finishes, two plots will be generated in the `output-dir`. For example, here are the profiling results for `examples/llm/configs/disagg.yaml`:
 ![Prefill Performance](../../docs/images/h100_prefill_performance.png)
@@ -90,7 +105,7 @@ Use the injector utility to place your DGD manifest into the PVC. The profiling
 python3 deploy/utils/inject_manifest.py \
  --namespace $NAMESPACE \
  --src components/backends/vllm/deploy/disagg.yaml \
-  --dest /configs/disagg.yaml
+  --dest /data/configs/disagg.yaml
 # Set the docker image for the profiling job; any docker image that contains your script.
 export DOCKER_IMAGE=nvcr.io/nvidia/dynamo:latest-vllm
@@ -112,15 +127,17 @@ Use the default pre-built image and inject custom configurations via PVC:
 2. **Inject your custom disagg configuration:**
   ```bash
   # Use default disagg.yaml config
-   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /configs/disagg.yaml
+   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src components/backends/vllm/deploy/disagg.yaml --dest /data/configs/disagg.yaml
   # Or use a custom disagg config file
-   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src my-custom-disagg.yaml --dest /configs/disagg.yaml
+   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src my-custom-disagg.yaml --dest /data/configs/disagg.yaml
   # Or specify a custom target path in the PVC
-   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src my-custom-disagg.yaml --dest /profiling_results/my-disagg.yaml
+   python3 deploy/utils/inject_manifest.py --namespace $NAMESPACE --src my-custom-disagg.yaml --dest /data/profiling_results/my-disagg.yaml
   ```
+   > **Note**: All paths must start with `/data/` for security reasons. If you forget this prefix, the script will show a helpful error message with the correct path.
 3. **Set the config path for the profiling job:**
   ```bash
   export DGD_CONFIG_FILE=/workspace/profiling_results/disagg.yaml # or your custom path
@@ -176,10 +193,10 @@ To download the results:
 ```bash
 # Download to directory
-python3 deploy/utils/download_pvc_results.py --namespace $NAMESPACE --output-dir ./results --folder /profiling_results
+python3 deploy/utils/download_pvc_results.py --namespace $NAMESPACE --output-dir ./results --folder /data/profiling_results
 # Download without any of the auto-created config.yaml files used in profiling
-python3 deploy/utils/download_pvc_results.py --namespace $NAMESPACE --output-dir ./results --folder /profiling_results --no-config
+python3 deploy/utils/download_pvc_results.py --namespace $NAMESPACE --output-dir ./results --folder /data/profiling_results --no-config
 ```
 The script will:
@@ -191,7 +208,7 @@ The script will:
 The profiling results directory contains the following structure:
 ```
-/workspace/profiling_results/
+/workspace/data/profiling_results/
 ├── prefill_performance.png                    # Main prefill performance plot
 ├── decode_performance.png                     # Main decode performance plot
 ├── prefill_tp1/                               # Individual TP profiling directories