"lib/runtime/vscode:/vscode.git/clone" did not exist on "3fd0ab3d74c9dc9e185580eb0a863f3c24465a67"
Unverified Commit a7b703bd authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: profiler sidecar and other SLA-driven autodeployment fixes (#3932)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent 3998fdcb
......@@ -12,7 +12,7 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
config:
# Sweep/profiling configuration
sweep:
......@@ -31,8 +31,7 @@ spec:
# Deployment overrides for the auto-created DGD
deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-540.5"
workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-554.0"
# Automatically create DynamoGraphDeployment after profiling
autoApply: true
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# DynamoGraphDeploymentRequest for standard online profiling
# Converted from profile_sla_job.yaml
# DynamoGraphDeploymentRequest for online profiling (actual deployment testing)
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeploymentRequest
metadata:
......@@ -13,12 +12,11 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
config:
# Sweep/profiling configuration
sweep:
skip_existing_results: true
# Standard online profiling (not using AI Configurator)
# Online profiling mode (real deployment testing)
use_ai_configurator: false
# SLA targets for profiling
......@@ -30,8 +28,7 @@ spec:
# Deployment overrides for the auto-created DGD
deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-540.5"
workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
# Automatically create DynamoGraphDeployment after profiling
autoApply: true
......@@ -44,6 +44,10 @@ def auto_generate_search_space(args: argparse.Namespace) -> None:
logger.info(f"Updating model in DGD config file to {args.model}")
config = config_modifier.update_model(config, args.model)
if args.dgd_image:
logger.info(f"Updating DGD image to {args.dgd_image}")
config = config_modifier.update_image(config, args.dgd_image)
config_fn = f"{args.output_dir}/disagg_config.yaml"
logger.info(f"Saving generated disagg DGD config for profiling to {config_fn}")
os.makedirs(args.output_dir, exist_ok=True)
......
......@@ -48,7 +48,6 @@ spec:
# Sweep/profiling configuration
sweep:
skip_existing_results: true # Skip configurations that already have results
prefill_interpolation_granularity: 16 # Samples for TTFT interpolation
decode_interpolation_granularity: 6 # Samples for ITL interpolation
......
......@@ -159,7 +159,24 @@ const (
const sidecarScriptTemplate = `
set -e
set -o pipefail
# Wait for the profiler container to complete, not just for the file to exist
# This ensures we capture the final config, not intermediate results
echo "Waiting for profiler to complete..."
while true; do
# Check if profiler container has finished (either Completed or Error state)
# Use kubectl to check the pod's container status
STATUS=$(kubectl get pod $HOSTNAME -n {{.Namespace}} -o jsonpath='{.status.containerStatuses[?(@.name=="profiler")].state}' 2>/dev/null || echo "")
if echo "$STATUS" | grep -q "terminated"; then
echo "Profiler container has terminated"
break
fi
sleep 5
done
# Now wait for the output file to exist
echo "Waiting for output file {{.OutputPath}}/{{.OutputFile}}..."
while [ ! -f {{.OutputPath}}/{{.OutputFile}} ]; do sleep 2; done
echo "Output file found, creating ConfigMap..."
# Start building ConfigMap YAML with DGD spec
cat >/tmp/cm.yaml <<EOF
......
......@@ -345,7 +345,6 @@ spec:
sweep:
use_ai_configurator: false
skip_existing_results: false
deploymentOverrides:
workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1"
......
......@@ -324,7 +324,6 @@ profilingConfig:
# Profiling sweep settings (optional)
sweep:
skip_existing_results: false
force_rerun: false
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment