Unverified Commit fa6a7f94 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: remove PVC logic from profiler planner (#4210)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 6f708832
......@@ -35,11 +35,6 @@ console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
class VolumeMount(BaseModel):
name: str = "dynamo-pvc"
mountPoint: str = "/data"
class Container(BaseModel):
image: Optional[str] = None
workingDir: Optional[str] = None
......@@ -71,15 +66,8 @@ class Services(BaseModel):
model_config = {"extra": "allow"}
class PVCConfig(BaseModel):
name: str = "dynamo-pvc"
create: Optional[bool] = False
model_config = {"extra": "allow"}
class Spec(BaseModel):
services: dict[str, Service]
pvcs: Optional[list[PVCConfig]] = None
model_config = {"extra": "allow"}
......@@ -99,11 +87,15 @@ class MultinodeConfig(BaseModel):
class DgdPlannerServiceConfig(BaseModel):
"""Planner service configuration.
Planner reads profiling data from a ConfigMap (planner-profile-data)
automatically created and mounted by the profiler; no PVC dependencies
"""
dynamoNamespace: str = "dynamo" # placeholder
componentType: str = "planner"
replicas: int = 1
# Do not attach PVC; we'll mount a ConfigMap for planner data instead.
volumeMounts: list[VolumeMount] = []
extraPodSpec: PodSpec = PodSpec(
mainContainer=Container(
image="my-registry/dynamo-runtime:my-tag", # placeholder
......
......@@ -206,7 +206,7 @@ def generate_dgd_config_with_planner(
mc_mounts.append(
{
"name": "planner-profile-data",
"mountPath": cm_mount_path,
"mountPoint": cm_mount_path,
"readOnly": True,
}
)
......
......@@ -202,17 +202,8 @@ data:
EOF
sed 's/^/ /' {{.OutputPath}}/{{.OutputFile}} >> /tmp/cm.yaml
# Add profiling data directories to ConfigMap for long-term storage
# Find all interpolation directories and add their raw_data.npz files
for dir in {{.OutputPath}}/*/interpolation; do
if [ -d "$dir" ]; then
dirname=$(basename $(dirname "$dir"))
if [ -f "$dir/raw_data.npz" ]; then
echo " ${dirname}_raw_data.npz: |" >> /tmp/cm.yaml
base64 "$dir/raw_data.npz" | sed 's/^/ /' >> /tmp/cm.yaml
fi
fi
done
# Note: Profiling data (raw_data.npz converted to JSON) is included in the
# generated DGD YAML as a separate ConfigMap by the profiler, no need to add it here
kubectl apply -f /tmp/cm.yaml
echo "Saved profiling output to ConfigMap {{.ConfigMapName}}"
......@@ -405,6 +396,19 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleProfilingState(ctx contex
// Record spec generation event
r.Recorder.Event(dgdr, corev1.EventTypeNormal, EventReasonSpecGenerated, MessageSpecGenerated)
// Create additional resources (ConfigMaps) immediately after profiling
// This ensures that the `planner-profile-data` ConfigMap is available for both auto and manual deployment
targetNamespace := dgdr.Namespace
if dgdr.Spec.DeploymentOverrides != nil && dgdr.Spec.DeploymentOverrides.Namespace != "" {
targetNamespace = dgdr.Spec.DeploymentOverrides.Namespace
}
if err := r.createAdditionalResources(ctx, dgdr, targetNamespace); err != nil {
logger.Error(err, "Failed to create additional resources after profiling")
// Don't fail the DGDR, just log the error - ConfigMaps can be created manually
r.Recorder.Event(dgdr, corev1.EventTypeWarning, "ConfigMapCreationFailed",
fmt.Sprintf("Failed to create ConfigMaps from profiling output: %v", err))
}
// If autoApply is enabled, transition to Deploying state
if dgdr.Spec.AutoApply {
logger.Info("AutoApply enabled, transitioning to Deploying state")
......@@ -479,20 +483,6 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleDeployingState(ctx contex
// Check if we need to create DGD
if dgdr.Status.Deployment == nil || !dgdr.Status.Deployment.Created {
// Determine target namespace for deployment
targetNamespace := dgdr.Namespace
if dgdr.Spec.DeploymentOverrides != nil && dgdr.Spec.DeploymentOverrides.Namespace != "" {
targetNamespace = dgdr.Spec.DeploymentOverrides.Namespace
}
// Deploy additional resources (ConfigMaps) from the profiling output first
if err := r.createAdditionalResources(ctx, dgdr, targetNamespace); err != nil {
logger.Error(err, "Failed to create additional resources")
r.Recorder.Event(dgdr, corev1.EventTypeWarning, MessageDeploymentCreationFailed,
fmt.Sprintf("Failed to create additional resources: %v", err))
return ctrl.Result{}, err
}
return r.createDGD(ctx, dgdr)
}
......@@ -1094,13 +1084,12 @@ func (r *DynamoGraphDeploymentRequestReconciler) createProfilingJob(ctx context.
}},
}
// Build volumes - use dynamo-pvc for profiling output so data persists for the Planner
// Build volumes - use emptyDir for profiling output
// The sidecar saves all needed data to ConfigMaps, so persistence is not needed
volumes := []corev1.Volume{{
Name: VolumeNameProfilingOutput,
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "dynamo-pvc",
},
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
}}
......
......@@ -291,14 +291,12 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
Expect(job.Spec.Template.Spec.Containers[0].Name).Should(Equal(ContainerNameProfiler))
Expect(job.Spec.Template.Spec.Containers[1].Name).Should(Equal(ContainerNameOutputCopier))
// Verify PVC volume mount
// Verify emptyDir volume (not PVC)
Expect(job.Spec.Template.Spec.Volumes).Should(ContainElement(
corev1.Volume{
Name: VolumeNameProfilingOutput,
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "dynamo-pvc",
},
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
))
......
......@@ -17,7 +17,7 @@ This includes:
- `setup_benchmarking_resources.sh` — Sets up benchmarking and profiling resources in your existing Dynamo namespace
- `manifests/`
- `pvc.yaml` — PVC `dynamo-pvc` for storing profiler results and configurations
- `pvc.yaml` — PVC `dynamo-pvc`
- `pvc-access-pod.yaml` — short‑lived pod for copying profiler results from the PVC
- `kubernetes.py` — helper used by tooling to apply/read resources (e.g., access pod for PVC access)
- `dynamo_deployment.py` — utilities for working with DynamoGraphDeployment resources
......@@ -108,15 +108,21 @@ kubectl cp $NAMESPACE/pvc-access-pod:/data/results ./benchmarks/results
kubectl cp $NAMESPACE/pvc-access-pod:/data/results/benchmark-name ./benchmarks/results/benchmark-name
```
**Download profiling results (optional, for local inspection):**
**Inspect profiling results (optional, for local inspection):**
```bash
# Optional: Download profiling data for local analysis
# The planner reads directly from the PVC, so this is only needed for inspection
kubectl cp $NAMESPACE/pvc-access-pod:/data ./profiling_data
# View the generated DGD configuration from profiling
kubectl get configmap dgdr-output-<dgdr-name> -n $NAMESPACE -o yaml
# View the planner profiling data (JSON format)
kubectl get configmap planner-profile-data -n $NAMESPACE -o yaml
```
> **Note on Profiling Results**: When using DGDR (DynamoGraphDeploymentRequest) for SLA-driven profiling, profiling data is stored in `/data/` on the PVC. The planner component reads this data directly from the PVC, so downloading is **optional** - only needed if you want to inspect the profiling results locally (e.g., view performance plots, check configurations).
> **Note on Profiling Results**: When using DGDR (DynamoGraphDeploymentRequest) for SLA-driven profiling, profiling data is automatically stored in ConfigMaps:
> - `dgdr-output-<dgdr-name>`: Contains the generated DynamoGraphDeployment YAML
> - `planner-profile-data`: Contains profiling performance data in JSON format for the planner
>
> The planner component reads this data directly from the mounted ConfigMap, so no PVC is needed.
#### Cleanup Access Pod
......@@ -131,7 +137,6 @@ kubectl delete pod pvc-access-pod -n $NAMESPACE
**Common path patterns in the PVC:**
- `/data/configs/` - Configuration files (DGD manifests)
- `/data/results/` - Benchmark results (for download after benchmarking jobs)
- `/data/` - Profiling data (used directly by planner, typically not downloaded)
- `/data/benchmarking/` - Benchmarking artifacts
#### Next Steps
......
......@@ -58,7 +58,6 @@ The Dynamo Operator watches for DGDRs and automatically:
Before creating a DGDR, ensure:
- **Dynamo platform installed** with the operator running (see [Installation Guide](/docs/kubernetes/installation_guide.md))
- **[kube-prometheus-stack](/docs/kubernetes/observability/metrics.md) installed and running** (required for SLA planner)
- **Profiling PVC created** (see [Benchmarking Resource Setup](/deploy/utils/README.md#benchmarking-resource-setup#BenchmarkingResourceSetup))
- **Image pull secrets configured** if using private registries (typically `nvcr-imagepullsecret` for NVIDIA images)
- **Sufficient GPU resources** available in your cluster for profiling
- **Runtime images available** that contain both profiler and runtime components
......@@ -360,41 +359,42 @@ spec:
Then manually extract and apply the generated DGD:
```bash
# Extract generated config
kubectl get dgdr sla-aic -n $NAMESPACE -o jsonpath='{.status.generatedConfig}' > my-dgd.yaml
# Extract generated DGD from DGDR status
kubectl get dgdr sla-aic -n $NAMESPACE -o jsonpath='{.status.generatedDeployment}' | kubectl apply -f -
# Review and modify if needed
vi my-dgd.yaml
# Or save to file first for review/modification
kubectl get dgdr sla-aic -n $NAMESPACE -o jsonpath='{.status.generatedDeployment}' > my-dgd.yaml
# Deploy manually
vi my-dgd.yaml
kubectl apply -f my-dgd.yaml -n $NAMESPACE
```
The generated DGD includes optimized configurations and the SLA planner component.
The generated DGD includes optimized configurations and the SLA planner component. The required `planner-profile-data` ConfigMap is automatically created when profiling completes, so the DGD will deploy successfully.
#### Option 2: Use Standalone Planner Templates (Advanced)
For advanced use cases, you can manually deploy using the standalone planner templates in `examples/backends/*/deploy/disagg_planner.yaml`:
```bash
# After profiling completes, profiling data is stored on the PVC at /data
# After profiling completes, profiling data is automatically stored in ConfigMaps
# OPTIONAL: Download profiling results for local inspection
# Create access pod (skip this step if access pod is already running)
kubectl apply -f deploy/utils/manifests/pvc-access-pod.yaml -n $NAMESPACE
kubectl wait --for=condition=Ready pod/pvc-access-pod -n $NAMESPACE --timeout=60s
# OPTIONAL: Inspect profiling results stored in ConfigMaps
# View the generated DGD configuration
kubectl get configmap dgdr-output-<dgdr-name> -n $NAMESPACE -o yaml
# Download the data
kubectl cp $NAMESPACE/pvc-access-pod:/data ./profiling_data
# View the planner profiling data (JSON format)
kubectl get configmap planner-profile-data -n $NAMESPACE -o yaml
# Cleanup
kubectl delete pod pvc-access-pod -n $NAMESPACE
# Update the PROMETHEUS_ENDPOINT environment variable in the planner template
# to match your cluster's Prometheus service location (see comments in the template)
# Update backend planner manifest as needed, then deploy
kubectl apply -f examples/backends/<backend>/deploy/disagg_planner.yaml -n $NAMESPACE
```
> **Note**: The standalone templates are provided as examples and may need customization for your model and requirements. The DGDR-generated configuration (Option 1) is recommended as it's automatically tuned to your profiling results and SLA targets.
>
> **Important - Prometheus Configuration**: The planner queries Prometheus to get frontend request metrics for scaling decisions. If you see errors like "Failed to resolve prometheus service", ensure the `PROMETHEUS_ENDPOINT` environment variable in your planner configuration correctly points to your Prometheus service. See the comments in the example templates for details.
### Relationship to DynamoGraphDeployment (DGD)
......
......@@ -6,9 +6,6 @@ kind: DynamoGraphDeployment
metadata:
name: sglang-disagg-planner
spec:
pvcs:
- name: dynamo-pvc
create: false # Must be pre-created before deployment and SLA profiler must have been run
services:
Frontend:
dynamoNamespace: dynamo
......@@ -22,9 +19,6 @@ spec:
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
volumeMounts:
- name: dynamo-pvc
mountPoint: /data
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
......@@ -37,7 +31,17 @@ spec:
- --environment=kubernetes
- --backend=sglang
- --adjustment-interval=60
- --profile-results-dir=/data
- --profile-results-dir=/workspace/profiling_results
volumeMounts:
- name: planner-profile-data
mountPath: /workspace/profiling_results
readOnly: true
volumes:
- name: planner-profile-data
configMap:
# Must be pre-created before deployment by the profiler
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
decode:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
......
......@@ -6,9 +6,6 @@ kind: DynamoGraphDeployment
metadata:
name: trtllm-disagg-planner
spec:
pvcs:
- name: dynamo-pvc
create: false
services:
Frontend:
dynamoNamespace: trtllm-disagg-planner
......@@ -39,9 +36,6 @@ spec:
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
volumeMounts:
- name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
......@@ -57,8 +51,18 @@ spec:
- --environment=kubernetes
- --backend=trtllm
- --adjustment-interval=60
- --profile-results-dir=/data
- --profile-results-dir=/workspace/profiling_results
- --prometheus-port=9085
volumeMounts:
- name: planner-profile-data
mountPath: /workspace/profiling_results
readOnly: true
volumes:
- name: planner-profile-data
configMap:
# Must be pre-created before deployment by the profiler
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
......
......@@ -99,7 +99,7 @@ We have public images available on [NGC Catalog](https://catalog.ngc.nvidia.com/
### Pre-Deployment Profiling (SLA Planner Only)
If using the SLA Planner deployment (`disagg_planner.yaml`), follow the [pre-deployment profiling guide](../../../../docs/benchmarks/sla_driven_profiling.md) to run pre-deployment profiling. The results will be saved to the `dynamo-pvc` PVC and queried by the SLA Planner.
If using the SLA Planner deployment (`disagg_planner.yaml`), follow the [pre-deployment profiling guide](../../../../docs/benchmarks/sla_driven_profiling.md) to run pre-deployment profiling.
## Usage
......
......@@ -6,9 +6,6 @@ kind: DynamoGraphDeployment
metadata:
name: vllm-disagg-planner
spec:
pvcs:
- name: dynamo-pvc
create: false # Must be pre-created before deployment and SLA profiler must have been run
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
......@@ -21,9 +18,6 @@ spec:
dynamoNamespace: vllm-disagg-planner
componentType: planner
replicas: 1
volumeMounts:
- name: dynamo-pvc
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......@@ -36,7 +30,17 @@ spec:
- --environment=kubernetes
- --backend=vllm
- --adjustment-interval=60
- --profile-results-dir=/data
- --profile-results-dir=/workspace/profiling_results
volumeMounts:
- name: planner-profile-data
mountPath: /workspace/profiling_results
readOnly: true
volumes:
- name: planner-profile-data
configMap:
# Must be pre-created before deployment by the profiler
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment