feat: turn profiling k8s jobs into sample DGDR requests (#3864)

Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hongkuan Zhou <tedzhouhk@gmail.com> Co-authored-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Hongkuan Zhou <tedzhouhk@gmail.com>

feat: turn profiling k8s jobs into sample DGDR requests (#3864)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com> Signed-off-by: hongkuanz <hongkuanz@nvidia.com> Signed-off-by: Hongkuan Zhou <tedzhouhk@gmail.com> Co-authored-by: hongkuanz <hongkuanz@nvidia.com> Co-authored-by: Hongkuan Zhou <tedzhouhk@gmail.com>
6a84ffd3 · hhzhang16 · GitHub · 0d07e2c3 · 6a84ffd3 · 6a84ffd3
Unverified Commit 6a84ffd3 authored Oct 27, 2025 by hhzhang16 Committed by GitHub Oct 27, 2025
4 changed files
--- a/docs/planner/sla_planner_quickstart.md
+++ b/docs/planner/sla_planner_quickstart.md
--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -23,7 +23,7 @@ Use the pre-configured test deployment with sample profiling data, we provide th
 ### Option B: Use Your Own Profiling Results
-1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../docs/benchmarks/pre_deployment_profiling.md) for detailed instructions.
+1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../docs/benchmarks/sla_driven_profiling.md) for detailed instructions.
 ## Interpolator Testing

--- a/tests/profiler/test_profile_sla_aiconfigurator.py
+++ b/tests/profiler/test_profile_sla_aiconfigurator.py
@@ -27,6 +27,8 @@ class TestProfileSlaAiconfigurator:
    def trtllm_args(self):
        class Args:
            def __init__(self):
+                self.model = ""
+                self.dgd_image = ""
                self.backend = "trtllm"
                self.config = "components/backends/trtllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"

--- a/tests/profiler/test_profile_sla_dryrun.py
+++ b/tests/profiler/test_profile_sla_dryrun.py
@@ -49,6 +49,8 @@ class TestProfileSLADryRun:
                self.config = "components/backends/vllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
+                self.model = ""
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
@@ -83,6 +85,8 @@ class TestProfileSLADryRun:
                self.config = "components/backends/sglang/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
+                self.model = ""
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
@@ -131,6 +135,8 @@ class TestProfileSLADryRun:
                self.config = "components/backends/trtllm/deploy/disagg.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
+                self.model = ""
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 1
                self.max_num_gpus_per_engine = 8
                self.skip_existing_results = False
@@ -172,6 +178,8 @@ class TestProfileSLADryRun:
                self.config = "recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml"
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
+                self.model = ""
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 8
                self.max_num_gpus_per_engine = 32
                self.skip_existing_results = False
@@ -233,6 +241,7 @@ class TestProfileSLADryRun:
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"  # Specify model for autogen
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 0  # Will be auto-generated
                self.max_num_gpus_per_engine = 0  # Will be auto-generated
                self.skip_existing_results = False
@@ -294,6 +303,7 @@ class TestProfileSLADryRun:
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"  # Specify model for autogen
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 0  # Will be auto-generated
                self.max_num_gpus_per_engine = 0  # Will be auto-generated
                self.skip_existing_results = False
@@ -355,6 +365,7 @@ class TestProfileSLADryRun:
                self.output_dir = "/tmp/test_profiling_results"
                self.namespace = "test-namespace"
                self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"  # Specify model for autogen
+                self.dgd_image = ""
                self.min_num_gpus_per_engine = 0  # Will be auto-generated
                self.max_num_gpus_per_engine = 0  # Will be auto-generated
                self.skip_existing_results = False