Unverified Commit 6a84ffd3 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: turn profiling k8s jobs into sample DGDR requests (#3864)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Signed-off-by: default avatarHongkuan Zhou <tedzhouhk@gmail.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarHongkuan Zhou <tedzhouhk@gmail.com>
parent 0d07e2c3
This diff is collapsed.
...@@ -23,7 +23,7 @@ Use the pre-configured test deployment with sample profiling data, we provide th ...@@ -23,7 +23,7 @@ Use the pre-configured test deployment with sample profiling data, we provide th
### Option B: Use Your Own Profiling Results ### Option B: Use Your Own Profiling Results
1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../docs/benchmarks/pre_deployment_profiling.md) for detailed instructions. 1. Run pre-deployment profiling for your specific setup. See the [pre-deployment profiling documentation](../../docs/benchmarks/sla_driven_profiling.md) for detailed instructions.
## Interpolator Testing ## Interpolator Testing
......
...@@ -27,6 +27,8 @@ class TestProfileSlaAiconfigurator: ...@@ -27,6 +27,8 @@ class TestProfileSlaAiconfigurator:
def trtllm_args(self): def trtllm_args(self):
class Args: class Args:
def __init__(self): def __init__(self):
self.model = ""
self.dgd_image = ""
self.backend = "trtllm" self.backend = "trtllm"
self.config = "components/backends/trtllm/deploy/disagg.yaml" self.config = "components/backends/trtllm/deploy/disagg.yaml"
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
......
...@@ -49,6 +49,8 @@ class TestProfileSLADryRun: ...@@ -49,6 +49,8 @@ class TestProfileSLADryRun:
self.config = "components/backends/vllm/deploy/disagg.yaml" self.config = "components/backends/vllm/deploy/disagg.yaml"
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = ""
self.dgd_image = ""
self.min_num_gpus_per_engine = 1 self.min_num_gpus_per_engine = 1
self.max_num_gpus_per_engine = 8 self.max_num_gpus_per_engine = 8
self.skip_existing_results = False self.skip_existing_results = False
...@@ -83,6 +85,8 @@ class TestProfileSLADryRun: ...@@ -83,6 +85,8 @@ class TestProfileSLADryRun:
self.config = "components/backends/sglang/deploy/disagg.yaml" self.config = "components/backends/sglang/deploy/disagg.yaml"
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = ""
self.dgd_image = ""
self.min_num_gpus_per_engine = 1 self.min_num_gpus_per_engine = 1
self.max_num_gpus_per_engine = 8 self.max_num_gpus_per_engine = 8
self.skip_existing_results = False self.skip_existing_results = False
...@@ -131,6 +135,8 @@ class TestProfileSLADryRun: ...@@ -131,6 +135,8 @@ class TestProfileSLADryRun:
self.config = "components/backends/trtllm/deploy/disagg.yaml" self.config = "components/backends/trtllm/deploy/disagg.yaml"
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = ""
self.dgd_image = ""
self.min_num_gpus_per_engine = 1 self.min_num_gpus_per_engine = 1
self.max_num_gpus_per_engine = 8 self.max_num_gpus_per_engine = 8
self.skip_existing_results = False self.skip_existing_results = False
...@@ -172,6 +178,8 @@ class TestProfileSLADryRun: ...@@ -172,6 +178,8 @@ class TestProfileSLADryRun:
self.config = "recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml" self.config = "recipes/deepseek-r1/sglang/disagg-16gpu/deploy.yaml"
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = ""
self.dgd_image = ""
self.min_num_gpus_per_engine = 8 self.min_num_gpus_per_engine = 8
self.max_num_gpus_per_engine = 32 self.max_num_gpus_per_engine = 32
self.skip_existing_results = False self.skip_existing_results = False
...@@ -233,6 +241,7 @@ class TestProfileSLADryRun: ...@@ -233,6 +241,7 @@ class TestProfileSLADryRun:
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen
self.dgd_image = ""
self.min_num_gpus_per_engine = 0 # Will be auto-generated self.min_num_gpus_per_engine = 0 # Will be auto-generated
self.max_num_gpus_per_engine = 0 # Will be auto-generated self.max_num_gpus_per_engine = 0 # Will be auto-generated
self.skip_existing_results = False self.skip_existing_results = False
...@@ -294,6 +303,7 @@ class TestProfileSLADryRun: ...@@ -294,6 +303,7 @@ class TestProfileSLADryRun:
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen
self.dgd_image = ""
self.min_num_gpus_per_engine = 0 # Will be auto-generated self.min_num_gpus_per_engine = 0 # Will be auto-generated
self.max_num_gpus_per_engine = 0 # Will be auto-generated self.max_num_gpus_per_engine = 0 # Will be auto-generated
self.skip_existing_results = False self.skip_existing_results = False
...@@ -355,6 +365,7 @@ class TestProfileSLADryRun: ...@@ -355,6 +365,7 @@ class TestProfileSLADryRun:
self.output_dir = "/tmp/test_profiling_results" self.output_dir = "/tmp/test_profiling_results"
self.namespace = "test-namespace" self.namespace = "test-namespace"
self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen self.model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" # Specify model for autogen
self.dgd_image = ""
self.min_num_gpus_per_engine = 0 # Will be auto-generated self.min_num_gpus_per_engine = 0 # Will be auto-generated
self.max_num_gpus_per_engine = 0 # Will be auto-generated self.max_num_gpus_per_engine = 0 # Will be auto-generated
self.skip_existing_results = False self.skip_existing_results = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment