"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "18d9d1fa4f8c6f82ddc0ecf4a55fc20238393a12"
Unverified Commit 659122ed authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: add Planner schema to DGDR and Profiler input (#6463)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Co-authored-by: default avatarSertaç Özercan <852750+sozercan@users.noreply.github.com>
parent 0f47842a
......@@ -126,7 +126,7 @@ jobs:
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0"
python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
......
......@@ -41,9 +41,9 @@ class PlannerConfig(BaseModel):
with defaults sourced from SLAPlannerDefaults.
"""
plannerPreDeploymentSweeping: Optional[PlannerPreDeploymentSweepMode] = Field(
pre_deployment_sweeping_mode: Optional[PlannerPreDeploymentSweepMode] = Field(
default=PlannerPreDeploymentSweepMode.Rapid,
description='PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
description='Controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
)
environment: Literal[
......
{
"$defs": {
"PlannerPreDeploymentSweepMode": {
"enum": [
"none",
"rapid",
"thorough"
],
"title": "PlannerPreDeploymentSweepMode",
"type": "string"
}
},
"description": "Pydantic configuration for the Dynamo Planner.\n\nReplaces the argparse-based CLI. All fields mirror the former CLI flags\nwith defaults sourced from SLAPlannerDefaults.",
"properties": {
"plannerPreDeploymentSweeping": {
"anyOf": [
{
"$ref": "#/$defs/PlannerPreDeploymentSweepMode"
},
{
"type": "null"
}
],
"default": "rapid",
"description": "PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. \"none\" means no pre-deployment sweep (only load-based scaling). \"rapid\" uses AI Configurator to simulate engine performance. \"thorough\" uses real GPUs to measure engine performance (takes several hours)."
},
"environment": {
"default": "kubernetes",
"enum": [
"kubernetes",
"virtual",
"global-planner"
],
"title": "Environment",
"type": "string"
},
"namespace": {
"title": "Namespace",
"type": "string"
},
"backend": {
"default": "vllm",
"enum": [
"vllm",
"sglang",
"trtllm",
"mocker"
],
"title": "Backend",
"type": "string"
},
"mode": {
"default": "disagg",
"enum": [
"disagg",
"prefill",
"decode",
"agg"
],
"title": "Mode",
"type": "string"
},
"no_operation": {
"default": false,
"title": "No Operation",
"type": "boolean"
},
"log_dir": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Log Dir"
},
"throughput_adjustment_interval": {
"default": 180,
"title": "Throughput Adjustment Interval",
"type": "integer"
},
"max_gpu_budget": {
"default": 8,
"title": "Max Gpu Budget",
"type": "integer"
},
"min_endpoint": {
"default": 1,
"title": "Min Endpoint",
"type": "integer"
},
"decode_engine_num_gpu": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Decode Engine Num Gpu"
},
"prefill_engine_num_gpu": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Prefill Engine Num Gpu"
},
"profile_results_dir": {
"default": "profiling_results",
"title": "Profile Results Dir",
"type": "string"
},
"ttft": {
"default": 500.0,
"title": "Ttft",
"type": "number"
},
"itl": {
"default": 50.0,
"title": "Itl",
"type": "number"
},
"load_predictor": {
"default": "arima",
"title": "Load Predictor",
"type": "string"
},
"load_predictor_log1p": {
"default": false,
"title": "Load Predictor Log1P",
"type": "boolean"
},
"prophet_window_size": {
"default": 50,
"title": "Prophet Window Size",
"type": "integer"
},
"load_predictor_warmup_trace": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Load Predictor Warmup Trace"
},
"kalman_q_level": {
"default": 1.0,
"title": "Kalman Q Level",
"type": "number"
},
"kalman_q_trend": {
"default": 0.1,
"title": "Kalman Q Trend",
"type": "number"
},
"kalman_r": {
"default": 10.0,
"title": "Kalman R",
"type": "number"
},
"kalman_min_points": {
"default": 5,
"title": "Kalman Min Points",
"type": "integer"
},
"metric_pulling_prometheus_endpoint": {
"title": "Metric Pulling Prometheus Endpoint",
"type": "string"
},
"metric_reporting_prometheus_port": {
"title": "Metric Reporting Prometheus Port",
"type": "integer"
},
"no_correction": {
"default": false,
"title": "No Correction",
"type": "boolean"
},
"model_name": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Model Name"
},
"global_planner_namespace": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Global Planner Namespace"
},
"enable_throughput_scaling": {
"default": true,
"title": "Enable Throughput Scaling",
"type": "boolean"
},
"enable_load_scaling": {
"default": false,
"title": "Enable Load Scaling",
"type": "boolean"
},
"load_router_metrics_url": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Load Router Metrics Url"
},
"load_adjustment_interval": {
"default": 5,
"title": "Load Adjustment Interval",
"type": "integer"
},
"load_learning_window": {
"default": 50,
"title": "Load Learning Window",
"type": "integer"
},
"load_scaling_down_sensitivity": {
"default": 80,
"title": "Load Scaling Down Sensitivity",
"type": "integer"
},
"load_metric_samples": {
"default": 10,
"title": "Load Metric Samples",
"type": "integer"
},
"load_min_observations": {
"default": 5,
"title": "Load Min Observations",
"type": "integer"
}
},
"title": "PlannerConfig",
"type": "object"
}
\ No newline at end of file
......@@ -26,6 +26,12 @@ from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field, model_validator
# Import canonical planner types - do NOT redefine them here.
from dynamo.planner.utils.planner_config import ( # noqa: F401 (re-exported)
PlannerConfig,
PlannerPreDeploymentSweepMode,
)
class DGDRPhase(str, Enum):
Pending = "Pending"
......@@ -63,12 +69,6 @@ class BackendType(str, Enum):
Vllm = "vllm"
class PlannerPreDeploymentSweepMode(str, Enum):
None_ = "none"
Rapid = "rapid"
Thorough = "thorough"
class WorkloadSpec(BaseModel):
"""WorkloadSpec defines the workload characteristics for SLA-based profiling."""
......@@ -161,22 +161,6 @@ class OverridesSpec(BaseModel):
)
class PlannerSpec(BaseModel):
"""PlannerSpec configures the SLA planner for autoscaling in the generated DGD."""
enabled: Optional[bool] = Field(
default=None, description="Enabled indicates whether the planner is enabled."
)
plannerPreDeploymentSweeping: Optional[PlannerPreDeploymentSweepMode] = Field(
default=None,
description='PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
)
plannerArgsList: Optional[List[str]] = Field(
default=None,
description="PlannerArgsList is a list of additional planner arguments.",
)
class MockerSpec(BaseModel):
"""MockerSpec configures the simulated (mocker) backend."""
......@@ -198,9 +182,9 @@ class KVRouterSpec(BaseModel):
class FeaturesSpec(BaseModel):
"""FeaturesSpec controls optional Dynamo platform features in the generated deployment."""
planner: Optional[PlannerSpec] = Field(
planner: Optional[PlannerConfig] = Field(
default=None,
description="Planner configures the SLA planner for autoscaling in the generated DGD.",
description="Planner is the raw SLA planner configuration passed to the planner service. Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig. Go treats this as opaque bytes; the Planner service validates it at startup. The presence of this field (non-null) enables the planner in the generated DGD.",
)
mocker: Optional[MockerSpec] = Field(
default=None,
......
......@@ -563,33 +563,13 @@ spec:
type: boolean
type: object
planner:
description: Planner configures the SLA planner for autoscaling in the generated DGD.
properties:
enabled:
description: Enabled indicates whether the planner is enabled.
type: boolean
plannerArgsList:
description: PlannerArgsList is a list of additional planner arguments.
items:
type: string
type: array
plannerPreDeploymentSweeping:
allOf:
- enum:
- none
- rapid
- thorough
- enum:
- none
- rapid
- thorough
description: |-
PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
"none" means no pre-deployment sweep (only load-based scaling).
"rapid" uses AI Configurator to simulate engine performance.
"thorough" uses real GPUs to measure engine performance (takes several hours).
type: string
description: |-
Planner is the raw SLA planner configuration passed to the planner service.
Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
Go treats this as opaque bytes; the Planner service validates it at startup.
The presence of this field (non-null) enables the planner in the generated DGD.
type: object
x-kubernetes-preserve-unknown-fields: true
type: object
hardware:
description: |-
......
# Image URL to use all building/pushing image targets
IMG ?= controller:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.29.0
......@@ -124,7 +125,7 @@ generate-pydantic: ## Generate Python Pydantic models from v1beta1 Go types (req
@echo "Generating Pydantic models from v1beta1 DGDR types..."
@python3 api/scripts/generate_pydantic_from_go.py
@echo "Running Pydantic validation tests..."
@python3 api/scripts/test_pydantic_models.py
@python3 api/scripts/validate_pydantic_models.py
.PHONY: fmt
fmt: ## Run go fmt against code.
......
......@@ -30,6 +30,24 @@ from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Tuple
# Types that should be IMPORTED rather than re-emitted.
# Maps Go type name → (Python import path, Python name, always_import).
# always_import=True: emit regardless of whether the type appears in the parsed
# structs/enums (e.g. types used only as field overrides, never as standalone Go types).
# Planner-specific types are the canonical hand-written source of truth.
_IMPORT_OVERRIDES: dict[str, tuple[str, str, bool]] = {
"PlannerPreDeploymentSweepMode": (
"dynamo.planner.utils.planner_config",
"PlannerPreDeploymentSweepMode",
True,
),
"PlannerConfig": (
"dynamo.planner.utils.planner_config",
"PlannerConfig",
True,
),
}
# Per-struct docstring overrides for cases where the Python docstring should differ
# from the Go comment (e.g. Python-specific mutual-exclusivity documentation).
_STRUCT_DOCSTRINGS: dict = {
......@@ -64,6 +82,14 @@ _STRUCT_EXTRAS: dict = {
""",
}
# Per-field Python type overrides. Maps (StructName, json_field_name) → Python type string.
# Used when the Go type (e.g. *runtime.RawExtension) should map to a richer Python type
# rather than the generic Dict[str, Any].
_FIELD_TYPE_OVERRIDES: dict[tuple[str, str], str] = {
# FeaturesSpec.planner is opaque in Go but strongly typed in Python.
("FeaturesSpec", "planner"): "Optional[PlannerConfig]",
}
_SPDX_HEADER = """\
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
......@@ -101,7 +127,10 @@ def _resolve_repo_root(start: Path) -> Path:
if (p / "go.mod").exists():
return p
p = p.parent
return start
raise RuntimeError(
f"Could not locate repository root from {start}. "
"Ensure the script is run inside the dynamo repository."
)
@dataclass
......@@ -154,6 +183,8 @@ class GoToPydanticConverter:
"runtime.RawExtension": "Dict[str, Any]",
"batchv1.JobSpec": "Dict[str, Any]",
"corev1.ResourceRequirements": "Dict[str, Any]",
"corev1.Toleration": "Dict[str, Any]",
"apiextensionsv1.JSON": "Any",
}
def __init__(self):
......@@ -447,8 +478,29 @@ class GoToPydanticConverter:
"",
]
# Generate enums first
# Emit import statements for overridden types, grouped by module
import_groups: dict[str, list[str]] = {}
for go_name, (mod, py_name, always_import) in _IMPORT_OVERRIDES.items():
in_enums = any(e.name == go_name for e in self.enums)
in_structs = any(s.name == go_name for s in self.structs)
if always_import or in_enums or in_structs:
import_groups.setdefault(mod, []).append(py_name)
for mod in sorted(import_groups):
names = sorted(import_groups[mod])
lines.append(
"# Import canonical planner types - do NOT redefine them here."
)
lines.append(f"from {mod} import ( # noqa: F401 (re-exported)")
for n in names:
lines.append(f" {n},")
lines.append(")")
lines.append("")
# Generate enums first (skip ones that are imported)
for enum in self.enums:
if enum.name in _IMPORT_OVERRIDES:
continue # imported above
lines.append("")
if enum.comment:
lines.append(f"# {enum.comment}")
......@@ -490,9 +542,16 @@ class GoToPydanticConverter:
effective_optional = go_field.is_optional and (
go_field.is_pointer or go_field.default is None
)
python_type = self._go_type_to_python(
go_field.go_type, go_field.is_pointer, effective_optional
)
override_key = (struct.name, go_field.name)
if override_key in _FIELD_TYPE_OVERRIDES:
python_type = _FIELD_TYPE_OVERRIDES[override_key]
# Derive effective_optional from the override string itself so
# default=None is emitted iff the type is actually Optional.
effective_optional = python_type.startswith("Optional[")
else:
python_type = self._go_type_to_python(
go_field.go_type, go_field.is_pointer, effective_optional
)
field_def = f" {go_field.name}: {python_type}"
......@@ -571,6 +630,16 @@ def main():
args = parser.parse_args()
# In the operator Docker build the context is deploy/operator/ only — components/src
# is not copied in. The generated file is already committed, so skip regeneration.
components_src = repo_root / "components" / "src"
if not components_src.exists():
print(
f"Note: {components_src} not found (operator-only build context). "
"Skipping Pydantic generation; using committed dgdr_v1beta1_types.py."
)
return 0
if not args.input.exists():
print(f"Error: Input file not found: {args.input}")
return 1
......
......@@ -21,6 +21,7 @@ Validates that the generated Pydantic models can be imported and used correctly.
import subprocess
import sys
import types
from pathlib import Path
......@@ -46,8 +47,50 @@ def _repo_root() -> Path:
return start
_components_src = _repo_root() / "components" / "src"
# In the operator Docker build the context is deploy/operator/ only — components/src
# is not copied in. The generated files are already committed, so skip validation.
if not _components_src.exists():
print(
f"Note: {_components_src} not found (operator-only build context). "
"Skipping Pydantic validation tests."
)
sys.exit(0)
# Add the components src to path so we can import the generated models
sys.path.insert(0, str(_repo_root() / "components" / "src"))
sys.path.insert(0, str(_components_src))
# ---------------------------------------------------------------------------
# Stub dynamo.runtime.logging and bypass the heavy dynamo.planner.__init__
# before importing any dynamo module.
#
# dynamo itself must be a namespace-like package (has __path__) so that
# Python's import machinery can traverse down to dynamo.profiler from the
# filesystem. dynamo.planner is pre-registered as a stub to skip its heavy
# __init__.py, while still allowing dynamo.planner.utils.* to load normally.
# ---------------------------------------------------------------------------
_dynamo_path = str(_components_src / "dynamo")
_planner_path = str(_components_src / "dynamo" / "planner")
if "dynamo" not in sys.modules:
_dynamo_mod = types.ModuleType("dynamo")
_dynamo_mod.__path__ = [_dynamo_path] # type: ignore[attr-defined]
_dynamo_mod.__package__ = "dynamo"
sys.modules["dynamo"] = _dynamo_mod
if "dynamo.runtime" not in sys.modules:
_runtime_mod = types.ModuleType("dynamo.runtime")
sys.modules["dynamo.runtime"] = _runtime_mod
_logging_mod = types.ModuleType("dynamo.runtime.logging")
_logging_mod.configure_dynamo_logging = lambda *args, **kwargs: None # type: ignore[attr-defined]
sys.modules["dynamo.runtime.logging"] = _logging_mod
_planner_mod = types.ModuleType("dynamo.planner")
_planner_mod.__path__ = [_planner_path] # type: ignore[attr-defined]
_planner_mod.__package__ = "dynamo.planner"
sys.modules["dynamo.planner"] = _planner_mod
import pydantic # noqa: E402
......@@ -61,8 +104,8 @@ from dynamo.profiler.utils.dgdr_v1beta1_types import ( # noqa: E402
MockerSpec,
ModelCacheSpec,
OptimizationType,
PlannerConfig,
PlannerPreDeploymentSweepMode,
PlannerSpec,
ProfilingPhase,
SearchStrategy,
SLASpec,
......@@ -105,7 +148,7 @@ def test_full_dgdr():
pvcModelPath="llama-3.1-405b",
),
features=FeaturesSpec(
planner=PlannerSpec(enabled=True),
planner=PlannerConfig(enable_load_scaling=False),
mocker=MockerSpec(enabled=False),
),
searchStrategy=SearchStrategy.Rapid,
......@@ -120,7 +163,7 @@ def test_full_dgdr():
assert spec.sla.itl == 10.0
assert spec.modelCache.pvcName == "model-cache"
assert spec.modelCache.pvcModelPath == "llama-3.1-405b"
assert spec.features.planner.enabled is True
assert isinstance(spec.features.planner, PlannerConfig)
assert spec.features.mocker.enabled is False
print("✓ Full DGDR spec validation passed")
......
......@@ -72,10 +72,16 @@
// Spec.DeploymentOverrides.{Name, nvidia.com/dgdr-deployment-overrides
// Namespace,Labels,Annotations}
//
// Planner config (opaque blob stored verbatim under blob["planner"]):
//
// v1beta1 blob key
// ────────────────────────────────────────── ──────────────────────────────────────
// Features.Planner (*runtime.RawExtension) planner.* (JSON fields written directly)
//
// v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO):
//
// Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType},
// Features.{Planner.*,KVRouter}, SearchStrategy
// Features.{KVRouter}, SearchStrategy
//
// # Status field mapping
//
......@@ -146,6 +152,7 @@ import (
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/conversion"
)
......@@ -239,6 +246,7 @@ func convertDGDRSpecTo(src *DynamoGraphDeploymentRequestSpec, dst *v1beta1.Dynam
}
applySLAAndWorkloadFromBlob(blob, dst)
applyModelCacheFromBlob(blob, dst)
applyPlannerFromBlob(blob, dst)
setAnnotation(dstObj, annDGDRProfilingConfig, string(src.ProfilingConfig.Config.Raw))
}
......@@ -426,6 +434,12 @@ func convertDGDRSpecFrom(src *v1beta1.DynamoGraphDeploymentRequestSpec, dst *Dyn
}
mergeModelCacheIntoBlob(src.ModelCache, blob)
}
if src.Features != nil && src.Features.Planner != nil {
if blob == nil {
blob = make(map[string]interface{})
}
mergePlannerIntoBlob(src.Features.Planner, blob)
}
if blob != nil {
if data, err := json.Marshal(blob); err == nil {
dst.ProfilingConfig.Config = &apiextensionsv1.JSON{Raw: data}
......@@ -491,6 +505,39 @@ func mergeModelCacheIntoBlob(mc *v1beta1.ModelCacheSpec, blob map[string]interfa
}
}
// mergePlannerIntoBlob writes the planner RawExtension into blob["planner"].
// The RawExtension is the full PlannerConfig JSON blob (opaque to Go).
func mergePlannerIntoBlob(planner *runtime.RawExtension, blob map[string]interface{}) {
if planner == nil || planner.Raw == nil {
return
}
var plannerMap map[string]interface{}
if err := json.Unmarshal(planner.Raw, &plannerMap); err != nil || len(plannerMap) == 0 {
return
}
blob["planner"] = plannerMap
}
// applyPlannerFromBlob extracts blob["planner"] and populates v1beta1 Features.Planner.
func applyPlannerFromBlob(blob map[string]interface{}, dst *v1beta1.DynamoGraphDeploymentRequestSpec) {
plannerRaw, ok := blob["planner"]
if !ok {
return
}
plannerMap, ok := plannerRaw.(map[string]interface{})
if !ok || len(plannerMap) == 0 {
return
}
raw, err := json.Marshal(plannerMap)
if err != nil {
return
}
if dst.Features == nil {
dst.Features = &v1beta1.FeaturesSpec{}
}
dst.Features.Planner = &runtime.RawExtension{Raw: raw}
}
// restoreAnnotationFields restores v1alpha1 spec fields that were annotation-preserved
// during ConvertTo: ConfigMapRef, OutputPVC, and DeploymentOverrides.
func restoreAnnotationFields(srcObj *v1beta1.DynamoGraphDeploymentRequest, dst *DynamoGraphDeploymentRequestSpec) {
......
......@@ -21,6 +21,9 @@ import (
"encoding/json"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
v1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
......@@ -43,6 +46,9 @@ func newV1alpha1DGDR() *DynamoGraphDeploymentRequest {
"pvcMountPath": "/data/model",
},
},
"planner": map[string]interface{}{
"enable_load_scaling": false,
},
"extra_key": "preserved",
}
blobRaw, _ := json.Marshal(profilingBlob)
......@@ -94,6 +100,7 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest {
osl := int32(256)
rawDGD, _ := json.Marshal(map[string]interface{}{"apiVersion": "nvidia.com/v1alpha1", "kind": "DynamoGraphDeployment"})
rawPlanner, _ := json.Marshal(map[string]interface{}{"enable_load_scaling": false})
return &v1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
......@@ -119,7 +126,8 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest {
PVCMountPath: "/models",
},
Features: &v1beta1.FeaturesSpec{
Mocker: &v1beta1.MockerSpec{Enabled: true},
Mocker: &v1beta1.MockerSpec{Enabled: true},
Planner: &runtime.RawExtension{Raw: rawPlanner},
},
},
Status: v1beta1.DynamoGraphDeploymentRequestStatus{
......@@ -269,44 +277,9 @@ func TestAlpha1RoundTrip(t *testing.T) {
}
// --- Spec checks ---
if restored.Spec.Model != original.Spec.Model {
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model)
}
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.UseMocker != original.Spec.UseMocker {
t.Errorf("Spec.UseMocker: got %v, want %v", restored.Spec.UseMocker, original.Spec.UseMocker)
}
if restored.Spec.ProfilingConfig.ProfilerImage != original.Spec.ProfilingConfig.ProfilerImage {
t.Errorf("ProfilingConfig.ProfilerImage: got %q, want %q", restored.Spec.ProfilingConfig.ProfilerImage, original.Spec.ProfilingConfig.ProfilerImage)
}
if restored.Spec.ProfilingConfig.OutputPVC != original.Spec.ProfilingConfig.OutputPVC {
t.Errorf("ProfilingConfig.OutputPVC: got %q, want %q", restored.Spec.ProfilingConfig.OutputPVC, original.Spec.ProfilingConfig.OutputPVC)
}
// ConfigMapRef round-trip
if restored.Spec.ProfilingConfig.ConfigMapRef == nil {
t.Fatal("ProfilingConfig.ConfigMapRef is nil after round-trip")
}
if restored.Spec.ProfilingConfig.ConfigMapRef.Name != original.Spec.ProfilingConfig.ConfigMapRef.Name {
t.Errorf("ConfigMapRef.Name: got %q, want %q", restored.Spec.ProfilingConfig.ConfigMapRef.Name, original.Spec.ProfilingConfig.ConfigMapRef.Name)
}
// EnableGPUDiscovery round-trip
if restored.Spec.EnableGPUDiscovery == nil || !*restored.Spec.EnableGPUDiscovery {
t.Error("Spec.EnableGPUDiscovery: expected true after round-trip")
}
// DeploymentOverrides round-trip
if restored.Spec.DeploymentOverrides == nil {
t.Fatal("Spec.DeploymentOverrides is nil after round-trip")
}
if restored.Spec.DeploymentOverrides.Name != original.Spec.DeploymentOverrides.Name {
t.Errorf("DeploymentOverrides.Name: got %q, want %q", restored.Spec.DeploymentOverrides.Name, original.Spec.DeploymentOverrides.Name)
// ProfilingConfig.Config (raw JSON blob) is verified separately below.
if diff := cmp.Diff(original.Spec, restored.Spec, cmpopts.IgnoreFields(ProfilingConfigSpec{}, "Config")); diff != "" {
t.Errorf("Spec mismatch after round-trip (-want +got):\n%s", diff)
}
// JSON blob round-trip: SLA fields re-emerge in ProfilingConfig.Config
......@@ -331,28 +304,18 @@ func TestAlpha1RoundTrip(t *testing.T) {
if blob["extra_key"] != "preserved" {
t.Errorf("extra_key: got %v, want %q", blob["extra_key"], "preserved")
}
// --- Status checks ---
if restored.Status.State != original.Status.State {
t.Errorf("Status.State: got %q, want %q", restored.Status.State, original.Status.State)
// Planner round-trip via applyPlannerFromBlob / mergePlannerIntoBlob
plannerMap, _ := blob["planner"].(map[string]interface{})
if plannerMap == nil {
t.Fatal("planner key missing in restored JSON blob")
}
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration)
if plannerMap["enable_load_scaling"] != false {
t.Errorf("planner.enable_load_scaling: got %v, want false", plannerMap["enable_load_scaling"])
}
if restored.Status.Backend != original.Status.Backend {
t.Errorf("Status.Backend: got %q, want %q", restored.Status.Backend, original.Status.Backend)
}
if restored.Status.ProfilingResults != original.Status.ProfilingResults {
t.Errorf("Status.ProfilingResults: got %q, want %q", restored.Status.ProfilingResults, original.Status.ProfilingResults)
}
if restored.Status.Deployment == nil {
t.Fatal("Status.Deployment is nil after round-trip")
}
if restored.Status.Deployment.Name != original.Status.Deployment.Name {
t.Errorf("Status.Deployment.Name: got %q, want %q", restored.Status.Deployment.Name, original.Status.Deployment.Name)
}
if restored.Status.Deployment.Created != original.Status.Deployment.Created {
t.Errorf("Status.Deployment.Created: got %v, want %v", restored.Status.Deployment.Created, original.Status.Deployment.Created)
// --- Status checks ---
if diff := cmp.Diff(original.Status, restored.Status); diff != "" {
t.Errorf("Status mismatch after round-trip (-want +got):\n%s", diff)
}
}
......@@ -373,79 +336,17 @@ func TestHubRoundTrip(t *testing.T) {
}
// --- Spec checks ---
if restored.Spec.Model != original.Spec.Model {
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model)
}
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.Image != original.Spec.Image {
t.Errorf("Spec.Image: got %q, want %q", restored.Spec.Image, original.Spec.Image)
}
// UseMocker round-trip via Features.Mocker.Enabled
if restored.Spec.Features == nil || restored.Spec.Features.Mocker == nil {
t.Fatal("Spec.Features.Mocker is nil after round-trip")
}
if restored.Spec.Features.Mocker.Enabled != original.Spec.Features.Mocker.Enabled {
t.Errorf("Features.Mocker.Enabled: got %v, want %v", restored.Spec.Features.Mocker.Enabled, original.Spec.Features.Mocker.Enabled)
}
// SLA round-trip via JSON blob
if restored.Spec.SLA == nil {
t.Fatal("Spec.SLA is nil after round-trip")
}
if restored.Spec.SLA.TTFT == nil || *restored.Spec.SLA.TTFT != *original.Spec.SLA.TTFT {
t.Errorf("SLA.TTFT: got %v, want %v", restored.Spec.SLA.TTFT, original.Spec.SLA.TTFT)
}
if restored.Spec.SLA.ITL == nil || *restored.Spec.SLA.ITL != *original.Spec.SLA.ITL {
t.Errorf("SLA.ITL: got %v, want %v", restored.Spec.SLA.ITL, original.Spec.SLA.ITL)
}
// Workload round-trip via JSON blob
if restored.Spec.Workload == nil {
t.Fatal("Spec.Workload is nil after round-trip")
}
if restored.Spec.Workload.ISL == nil || *restored.Spec.Workload.ISL != *original.Spec.Workload.ISL {
t.Errorf("Workload.ISL: got %v, want %v", restored.Spec.Workload.ISL, original.Spec.Workload.ISL)
}
if restored.Spec.Workload.OSL == nil || *restored.Spec.Workload.OSL != *original.Spec.Workload.OSL {
t.Errorf("Workload.OSL: got %v, want %v", restored.Spec.Workload.OSL, original.Spec.Workload.OSL)
}
// ModelCache round-trip via JSON blob
if restored.Spec.ModelCache == nil {
t.Fatal("Spec.ModelCache is nil after round-trip")
}
if restored.Spec.ModelCache.PVCName != original.Spec.ModelCache.PVCName {
t.Errorf("ModelCache.PVCName: got %q, want %q", restored.Spec.ModelCache.PVCName, original.Spec.ModelCache.PVCName)
}
if restored.Spec.ModelCache.PVCModelPath != original.Spec.ModelCache.PVCModelPath {
t.Errorf("ModelCache.PVCModelPath: got %q, want %q", restored.Spec.ModelCache.PVCModelPath, original.Spec.ModelCache.PVCModelPath)
}
if restored.Spec.ModelCache.PVCMountPath != original.Spec.ModelCache.PVCMountPath {
t.Errorf("ModelCache.PVCMountPath: got %q, want %q", restored.Spec.ModelCache.PVCMountPath, original.Spec.ModelCache.PVCMountPath)
if diff := cmp.Diff(original.Spec, restored.Spec); diff != "" {
t.Errorf("Spec mismatch after round-trip (-want +got):\n%s", diff)
}
// --- Status checks ---
// Deployed → Ready (lossy: v1alpha1 has no "Deployed" state; maps to "Ready")
// then on the way back Ready→Ready
// Phase is intentionally lossy: DGDRPhaseDeployed → Ready → Ready
if restored.Status.Phase != v1beta1.DGDRPhaseReady {
t.Errorf("Status.Phase: got %q, want %q (Deployed→Ready is lossy)", restored.Status.Phase, v1beta1.DGDRPhaseReady)
}
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration)
}
// DGDName round-trip
if restored.Status.DGDName != original.Status.DGDName {
t.Errorf("Status.DGDName: got %q, want %q", restored.Status.DGDName, original.Status.DGDName)
}
// ProfilingJobName round-trip via annotation
if restored.Status.ProfilingJobName != original.Status.ProfilingJobName {
t.Errorf("Status.ProfilingJobName: got %q, want %q", restored.Status.ProfilingJobName, original.Status.ProfilingJobName)
if diff := cmp.Diff(original.Status, restored.Status, cmpopts.IgnoreFields(v1beta1.DynamoGraphDeploymentRequestStatus{}, "Phase")); diff != "" {
t.Errorf("Status mismatch after round-trip (-want +got):\n%s", diff)
}
// GeneratedDeployment round-trip via ProfilingResults.SelectedConfig
if restored.Status.ProfilingResults == nil || restored.Status.ProfilingResults.SelectedConfig == nil {
......
......@@ -286,35 +286,6 @@ type OverridesSpec struct {
DGD *runtime.RawExtension `json:"dgd,omitempty"`
}
// PlannerPreDeploymentSweepMode controls pre-deployment sweeping thoroughness for planner profiling.
// +kubebuilder:validation:Enum=none;rapid;thorough
type PlannerPreDeploymentSweepMode string
const (
PlannerPreDeploymentSweepModeNone PlannerPreDeploymentSweepMode = "none"
PlannerPreDeploymentSweepModeRapid PlannerPreDeploymentSweepMode = "rapid"
PlannerPreDeploymentSweepModeThorough PlannerPreDeploymentSweepMode = "thorough"
)
// PlannerSpec configures the SLA planner for autoscaling in the generated DGD.
type PlannerSpec struct {
// Enabled indicates whether the planner is enabled.
// +optional
Enabled bool `json:"enabled,omitempty"`
// PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
// "none" means no pre-deployment sweep (only load-based scaling).
// "rapid" uses AI Configurator to simulate engine performance.
// "thorough" uses real GPUs to measure engine performance (takes several hours).
// +optional
// +kubebuilder:validation:Enum=none;rapid;thorough
PlannerPreDeploymentSweeping *PlannerPreDeploymentSweepMode `json:"plannerPreDeploymentSweeping,omitempty"`
// PlannerArgsList is a list of additional planner arguments.
// +optional
PlannerArgsList []string `json:"plannerArgsList,omitempty"`
}
// MockerSpec configures the simulated (mocker) backend.
type MockerSpec struct {
// Enabled indicates whether to deploy mocker workers instead of real inference workers.
......@@ -333,9 +304,14 @@ type KVRouterSpec struct {
// FeaturesSpec controls optional Dynamo platform features in the generated deployment.
type FeaturesSpec struct {
// Planner configures the SLA planner for autoscaling in the generated DGD.
// Planner is the raw SLA planner configuration passed to the planner service.
// Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
// Go treats this as opaque bytes; the Planner service validates it at startup.
// The presence of this field (non-null) enables the planner in the generated DGD.
// +optional
Planner *PlannerSpec `json:"planner,omitempty"`
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Type=object
Planner *runtime.RawExtension `json:"planner,omitempty"`
// TODO: KVRouter support is not yet implemented in the operator.
// KVRouter *KVRouterSpec `json:"kvRouter,omitempty"`
......
......@@ -209,7 +209,7 @@ func (in *FeaturesSpec) DeepCopyInto(out *FeaturesSpec) {
*out = *in
if in.Planner != nil {
in, out := &in.Planner, &out.Planner
*out = new(PlannerSpec)
*out = new(runtime.RawExtension)
(*in).DeepCopyInto(*out)
}
if in.Mocker != nil {
......@@ -345,31 +345,6 @@ func (in *ParetoConfig) DeepCopy() *ParetoConfig {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PlannerSpec) DeepCopyInto(out *PlannerSpec) {
*out = *in
if in.PlannerPreDeploymentSweeping != nil {
in, out := &in.PlannerPreDeploymentSweeping, &out.PlannerPreDeploymentSweeping
*out = new(PlannerPreDeploymentSweepMode)
**out = **in
}
if in.PlannerArgsList != nil {
in, out := &in.PlannerArgsList, &out.PlannerArgsList
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlannerSpec.
func (in *PlannerSpec) DeepCopy() *PlannerSpec {
if in == nil {
return nil
}
out := new(PlannerSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProfilingResultsStatus) DeepCopyInto(out *ProfilingResultsStatus) {
*out = *in
......
......@@ -563,33 +563,13 @@ spec:
type: boolean
type: object
planner:
description: Planner configures the SLA planner for autoscaling in the generated DGD.
properties:
enabled:
description: Enabled indicates whether the planner is enabled.
type: boolean
plannerArgsList:
description: PlannerArgsList is a list of additional planner arguments.
items:
type: string
type: array
plannerPreDeploymentSweeping:
allOf:
- enum:
- none
- rapid
- thorough
- enum:
- none
- rapid
- thorough
description: |-
PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
"none" means no pre-deployment sweep (only load-based scaling).
"rapid" uses AI Configurator to simulate engine performance.
"thorough" uses real GPUs to measure engine performance (takes several hours).
type: string
description: |-
Planner is the raw SLA planner configuration passed to the planner service.
Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
Go treats this as opaque bytes; the Planner service validates it at startup.
The presence of this field (non-null) enables the planner in the generated DGD.
type: object
x-kubernetes-preserve-unknown-fields: true
type: object
hardware:
description: |-
......
......@@ -1373,7 +1373,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `planner` _[PlannerSpec](#plannerspec)_ | Planner configures the SLA planner for autoscaling in the generated DGD. | | Optional: \{\} <br /> |
| `planner` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | Planner is the raw SLA planner configuration passed to the planner service.<br />Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.<br />Go treats this as opaque bytes; the Planner service validates it at startup.<br />The presence of this field (non-null) enables the planner in the generated DGD. | | Type: object <br />Optional: \{\} <br /> |
| `mocker` _[MockerSpec](#mockerspec)_ | Mocker configures the simulated (mocker) backend for testing without GPUs. | | Optional: \{\} <br /> |
......@@ -1485,43 +1485,6 @@ _Appears in:_
| `config` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | Config is the full deployment configuration for this Pareto point. | | Type: object <br /> |
#### PlannerPreDeploymentSweepMode
_Underlying type:_ _string_
PlannerPreDeploymentSweepMode controls pre-deployment sweeping thoroughness for planner profiling.
_Validation:_
- Enum: [none rapid thorough]
_Appears in:_
- [PlannerSpec](#plannerspec)
| Field | Description |
| --- | --- |
| `none` | |
| `rapid` | |
| `thorough` | |
#### PlannerSpec
PlannerSpec configures the SLA planner for autoscaling in the generated DGD.
_Appears in:_
- [FeaturesSpec](#featuresspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `enabled` _boolean_ | Enabled indicates whether the planner is enabled. | | Optional: \{\} <br /> |
| `plannerPreDeploymentSweeping` _[PlannerPreDeploymentSweepMode](#plannerpredeploymentsweepmode)_ | PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.<br />"none" means no pre-deployment sweep (only load-based scaling).<br />"rapid" uses AI Configurator to simulate engine performance.<br />"thorough" uses real GPUs to measure engine performance (takes several hours). | | Enum: [none rapid thorough] <br />Optional: \{\} <br /> |
| `plannerArgsList` _string array_ | PlannerArgsList is a list of additional planner arguments. | | Optional: \{\} <br /> |
#### ProfilingPhase
_Underlying type:_ _string_
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment