"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "3205e7dbdf291f2f1d25b405abf661838a699102"
Unverified Commit 659122ed authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: add Planner schema to DGDR and Profiler input (#6463)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Co-authored-by: default avatarSertaç Özercan <852750+sozercan@users.noreply.github.com>
parent 0f47842a
...@@ -126,7 +126,7 @@ jobs: ...@@ -126,7 +126,7 @@ jobs:
working-directory: ./deploy/operator working-directory: ./deploy/operator
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0" python -m pip install "pydantic>=2,<3" "black==23.1.0" "pyyaml>=6.0"
- name: Check for uncommitted changes - name: Check for uncommitted changes
shell: bash shell: bash
working-directory: ./deploy/operator working-directory: ./deploy/operator
......
...@@ -41,9 +41,9 @@ class PlannerConfig(BaseModel): ...@@ -41,9 +41,9 @@ class PlannerConfig(BaseModel):
with defaults sourced from SLAPlannerDefaults. with defaults sourced from SLAPlannerDefaults.
""" """
plannerPreDeploymentSweeping: Optional[PlannerPreDeploymentSweepMode] = Field( pre_deployment_sweeping_mode: Optional[PlannerPreDeploymentSweepMode] = Field(
default=PlannerPreDeploymentSweepMode.Rapid, default=PlannerPreDeploymentSweepMode.Rapid,
description='PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).', description='Controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
) )
environment: Literal[ environment: Literal[
......
{
"$defs": {
"PlannerPreDeploymentSweepMode": {
"enum": [
"none",
"rapid",
"thorough"
],
"title": "PlannerPreDeploymentSweepMode",
"type": "string"
}
},
"description": "Pydantic configuration for the Dynamo Planner.\n\nReplaces the argparse-based CLI. All fields mirror the former CLI flags\nwith defaults sourced from SLAPlannerDefaults.",
"properties": {
"plannerPreDeploymentSweeping": {
"anyOf": [
{
"$ref": "#/$defs/PlannerPreDeploymentSweepMode"
},
{
"type": "null"
}
],
"default": "rapid",
"description": "PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. \"none\" means no pre-deployment sweep (only load-based scaling). \"rapid\" uses AI Configurator to simulate engine performance. \"thorough\" uses real GPUs to measure engine performance (takes several hours)."
},
"environment": {
"default": "kubernetes",
"enum": [
"kubernetes",
"virtual",
"global-planner"
],
"title": "Environment",
"type": "string"
},
"namespace": {
"title": "Namespace",
"type": "string"
},
"backend": {
"default": "vllm",
"enum": [
"vllm",
"sglang",
"trtllm",
"mocker"
],
"title": "Backend",
"type": "string"
},
"mode": {
"default": "disagg",
"enum": [
"disagg",
"prefill",
"decode",
"agg"
],
"title": "Mode",
"type": "string"
},
"no_operation": {
"default": false,
"title": "No Operation",
"type": "boolean"
},
"log_dir": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Log Dir"
},
"throughput_adjustment_interval": {
"default": 180,
"title": "Throughput Adjustment Interval",
"type": "integer"
},
"max_gpu_budget": {
"default": 8,
"title": "Max Gpu Budget",
"type": "integer"
},
"min_endpoint": {
"default": 1,
"title": "Min Endpoint",
"type": "integer"
},
"decode_engine_num_gpu": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Decode Engine Num Gpu"
},
"prefill_engine_num_gpu": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"default": null,
"title": "Prefill Engine Num Gpu"
},
"profile_results_dir": {
"default": "profiling_results",
"title": "Profile Results Dir",
"type": "string"
},
"ttft": {
"default": 500.0,
"title": "Ttft",
"type": "number"
},
"itl": {
"default": 50.0,
"title": "Itl",
"type": "number"
},
"load_predictor": {
"default": "arima",
"title": "Load Predictor",
"type": "string"
},
"load_predictor_log1p": {
"default": false,
"title": "Load Predictor Log1P",
"type": "boolean"
},
"prophet_window_size": {
"default": 50,
"title": "Prophet Window Size",
"type": "integer"
},
"load_predictor_warmup_trace": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Load Predictor Warmup Trace"
},
"kalman_q_level": {
"default": 1.0,
"title": "Kalman Q Level",
"type": "number"
},
"kalman_q_trend": {
"default": 0.1,
"title": "Kalman Q Trend",
"type": "number"
},
"kalman_r": {
"default": 10.0,
"title": "Kalman R",
"type": "number"
},
"kalman_min_points": {
"default": 5,
"title": "Kalman Min Points",
"type": "integer"
},
"metric_pulling_prometheus_endpoint": {
"title": "Metric Pulling Prometheus Endpoint",
"type": "string"
},
"metric_reporting_prometheus_port": {
"title": "Metric Reporting Prometheus Port",
"type": "integer"
},
"no_correction": {
"default": false,
"title": "No Correction",
"type": "boolean"
},
"model_name": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Model Name"
},
"global_planner_namespace": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Global Planner Namespace"
},
"enable_throughput_scaling": {
"default": true,
"title": "Enable Throughput Scaling",
"type": "boolean"
},
"enable_load_scaling": {
"default": false,
"title": "Enable Load Scaling",
"type": "boolean"
},
"load_router_metrics_url": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Load Router Metrics Url"
},
"load_adjustment_interval": {
"default": 5,
"title": "Load Adjustment Interval",
"type": "integer"
},
"load_learning_window": {
"default": 50,
"title": "Load Learning Window",
"type": "integer"
},
"load_scaling_down_sensitivity": {
"default": 80,
"title": "Load Scaling Down Sensitivity",
"type": "integer"
},
"load_metric_samples": {
"default": 10,
"title": "Load Metric Samples",
"type": "integer"
},
"load_min_observations": {
"default": 5,
"title": "Load Min Observations",
"type": "integer"
}
},
"title": "PlannerConfig",
"type": "object"
}
\ No newline at end of file
...@@ -26,6 +26,12 @@ from typing import Any, Dict, List, Optional ...@@ -26,6 +26,12 @@ from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field, model_validator from pydantic import BaseModel, Field, model_validator
# Import canonical planner types - do NOT redefine them here.
from dynamo.planner.utils.planner_config import ( # noqa: F401 (re-exported)
PlannerConfig,
PlannerPreDeploymentSweepMode,
)
class DGDRPhase(str, Enum): class DGDRPhase(str, Enum):
Pending = "Pending" Pending = "Pending"
...@@ -63,12 +69,6 @@ class BackendType(str, Enum): ...@@ -63,12 +69,6 @@ class BackendType(str, Enum):
Vllm = "vllm" Vllm = "vllm"
class PlannerPreDeploymentSweepMode(str, Enum):
None_ = "none"
Rapid = "rapid"
Thorough = "thorough"
class WorkloadSpec(BaseModel): class WorkloadSpec(BaseModel):
"""WorkloadSpec defines the workload characteristics for SLA-based profiling.""" """WorkloadSpec defines the workload characteristics for SLA-based profiling."""
...@@ -161,22 +161,6 @@ class OverridesSpec(BaseModel): ...@@ -161,22 +161,6 @@ class OverridesSpec(BaseModel):
) )
class PlannerSpec(BaseModel):
"""PlannerSpec configures the SLA planner for autoscaling in the generated DGD."""
enabled: Optional[bool] = Field(
default=None, description="Enabled indicates whether the planner is enabled."
)
plannerPreDeploymentSweeping: Optional[PlannerPreDeploymentSweepMode] = Field(
default=None,
description='PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
)
plannerArgsList: Optional[List[str]] = Field(
default=None,
description="PlannerArgsList is a list of additional planner arguments.",
)
class MockerSpec(BaseModel): class MockerSpec(BaseModel):
"""MockerSpec configures the simulated (mocker) backend.""" """MockerSpec configures the simulated (mocker) backend."""
...@@ -198,9 +182,9 @@ class KVRouterSpec(BaseModel): ...@@ -198,9 +182,9 @@ class KVRouterSpec(BaseModel):
class FeaturesSpec(BaseModel): class FeaturesSpec(BaseModel):
"""FeaturesSpec controls optional Dynamo platform features in the generated deployment.""" """FeaturesSpec controls optional Dynamo platform features in the generated deployment."""
planner: Optional[PlannerSpec] = Field( planner: Optional[PlannerConfig] = Field(
default=None, default=None,
description="Planner configures the SLA planner for autoscaling in the generated DGD.", description="Planner is the raw SLA planner configuration passed to the planner service. Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig. Go treats this as opaque bytes; the Planner service validates it at startup. The presence of this field (non-null) enables the planner in the generated DGD.",
) )
mocker: Optional[MockerSpec] = Field( mocker: Optional[MockerSpec] = Field(
default=None, default=None,
......
...@@ -563,33 +563,13 @@ spec: ...@@ -563,33 +563,13 @@ spec:
type: boolean type: boolean
type: object type: object
planner: planner:
description: Planner configures the SLA planner for autoscaling in the generated DGD. description: |-
properties: Planner is the raw SLA planner configuration passed to the planner service.
enabled: Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
description: Enabled indicates whether the planner is enabled. Go treats this as opaque bytes; the Planner service validates it at startup.
type: boolean The presence of this field (non-null) enables the planner in the generated DGD.
plannerArgsList:
description: PlannerArgsList is a list of additional planner arguments.
items:
type: string
type: array
plannerPreDeploymentSweeping:
allOf:
- enum:
- none
- rapid
- thorough
- enum:
- none
- rapid
- thorough
description: |-
PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
"none" means no pre-deployment sweep (only load-based scaling).
"rapid" uses AI Configurator to simulate engine performance.
"thorough" uses real GPUs to measure engine performance (takes several hours).
type: string
type: object type: object
x-kubernetes-preserve-unknown-fields: true
type: object type: object
hardware: hardware:
description: |- description: |-
......
# Image URL to use all building/pushing image targets # Image URL to use all building/pushing image targets
IMG ?= controller:latest IMG ?= controller:latest
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
ENVTEST_K8S_VERSION = 1.29.0 ENVTEST_K8S_VERSION = 1.29.0
...@@ -124,7 +125,7 @@ generate-pydantic: ## Generate Python Pydantic models from v1beta1 Go types (req ...@@ -124,7 +125,7 @@ generate-pydantic: ## Generate Python Pydantic models from v1beta1 Go types (req
@echo "Generating Pydantic models from v1beta1 DGDR types..." @echo "Generating Pydantic models from v1beta1 DGDR types..."
@python3 api/scripts/generate_pydantic_from_go.py @python3 api/scripts/generate_pydantic_from_go.py
@echo "Running Pydantic validation tests..." @echo "Running Pydantic validation tests..."
@python3 api/scripts/test_pydantic_models.py @python3 api/scripts/validate_pydantic_models.py
.PHONY: fmt .PHONY: fmt
fmt: ## Run go fmt against code. fmt: ## Run go fmt against code.
......
...@@ -30,6 +30,24 @@ from dataclasses import dataclass, field ...@@ -30,6 +30,24 @@ from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
# Types that should be IMPORTED rather than re-emitted.
# Maps Go type name → (Python import path, Python name, always_import).
# always_import=True: emit regardless of whether the type appears in the parsed
# structs/enums (e.g. types used only as field overrides, never as standalone Go types).
# Planner-specific types are the canonical hand-written source of truth.
_IMPORT_OVERRIDES: dict[str, tuple[str, str, bool]] = {
"PlannerPreDeploymentSweepMode": (
"dynamo.planner.utils.planner_config",
"PlannerPreDeploymentSweepMode",
True,
),
"PlannerConfig": (
"dynamo.planner.utils.planner_config",
"PlannerConfig",
True,
),
}
# Per-struct docstring overrides for cases where the Python docstring should differ # Per-struct docstring overrides for cases where the Python docstring should differ
# from the Go comment (e.g. Python-specific mutual-exclusivity documentation). # from the Go comment (e.g. Python-specific mutual-exclusivity documentation).
_STRUCT_DOCSTRINGS: dict = { _STRUCT_DOCSTRINGS: dict = {
...@@ -64,6 +82,14 @@ _STRUCT_EXTRAS: dict = { ...@@ -64,6 +82,14 @@ _STRUCT_EXTRAS: dict = {
""", """,
} }
# Per-field Python type overrides. Maps (StructName, json_field_name) → Python type string.
# Used when the Go type (e.g. *runtime.RawExtension) should map to a richer Python type
# rather than the generic Dict[str, Any].
_FIELD_TYPE_OVERRIDES: dict[tuple[str, str], str] = {
# FeaturesSpec.planner is opaque in Go but strongly typed in Python.
("FeaturesSpec", "planner"): "Optional[PlannerConfig]",
}
_SPDX_HEADER = """\ _SPDX_HEADER = """\
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
...@@ -101,7 +127,10 @@ def _resolve_repo_root(start: Path) -> Path: ...@@ -101,7 +127,10 @@ def _resolve_repo_root(start: Path) -> Path:
if (p / "go.mod").exists(): if (p / "go.mod").exists():
return p return p
p = p.parent p = p.parent
return start raise RuntimeError(
f"Could not locate repository root from {start}. "
"Ensure the script is run inside the dynamo repository."
)
@dataclass @dataclass
...@@ -154,6 +183,8 @@ class GoToPydanticConverter: ...@@ -154,6 +183,8 @@ class GoToPydanticConverter:
"runtime.RawExtension": "Dict[str, Any]", "runtime.RawExtension": "Dict[str, Any]",
"batchv1.JobSpec": "Dict[str, Any]", "batchv1.JobSpec": "Dict[str, Any]",
"corev1.ResourceRequirements": "Dict[str, Any]", "corev1.ResourceRequirements": "Dict[str, Any]",
"corev1.Toleration": "Dict[str, Any]",
"apiextensionsv1.JSON": "Any",
} }
def __init__(self): def __init__(self):
...@@ -447,8 +478,29 @@ class GoToPydanticConverter: ...@@ -447,8 +478,29 @@ class GoToPydanticConverter:
"", "",
] ]
# Generate enums first # Emit import statements for overridden types, grouped by module
import_groups: dict[str, list[str]] = {}
for go_name, (mod, py_name, always_import) in _IMPORT_OVERRIDES.items():
in_enums = any(e.name == go_name for e in self.enums)
in_structs = any(s.name == go_name for s in self.structs)
if always_import or in_enums or in_structs:
import_groups.setdefault(mod, []).append(py_name)
for mod in sorted(import_groups):
names = sorted(import_groups[mod])
lines.append(
"# Import canonical planner types - do NOT redefine them here."
)
lines.append(f"from {mod} import ( # noqa: F401 (re-exported)")
for n in names:
lines.append(f" {n},")
lines.append(")")
lines.append("")
# Generate enums first (skip ones that are imported)
for enum in self.enums: for enum in self.enums:
if enum.name in _IMPORT_OVERRIDES:
continue # imported above
lines.append("") lines.append("")
if enum.comment: if enum.comment:
lines.append(f"# {enum.comment}") lines.append(f"# {enum.comment}")
...@@ -490,9 +542,16 @@ class GoToPydanticConverter: ...@@ -490,9 +542,16 @@ class GoToPydanticConverter:
effective_optional = go_field.is_optional and ( effective_optional = go_field.is_optional and (
go_field.is_pointer or go_field.default is None go_field.is_pointer or go_field.default is None
) )
python_type = self._go_type_to_python( override_key = (struct.name, go_field.name)
go_field.go_type, go_field.is_pointer, effective_optional if override_key in _FIELD_TYPE_OVERRIDES:
) python_type = _FIELD_TYPE_OVERRIDES[override_key]
# Derive effective_optional from the override string itself so
# default=None is emitted iff the type is actually Optional.
effective_optional = python_type.startswith("Optional[")
else:
python_type = self._go_type_to_python(
go_field.go_type, go_field.is_pointer, effective_optional
)
field_def = f" {go_field.name}: {python_type}" field_def = f" {go_field.name}: {python_type}"
...@@ -571,6 +630,16 @@ def main(): ...@@ -571,6 +630,16 @@ def main():
args = parser.parse_args() args = parser.parse_args()
# In the operator Docker build the context is deploy/operator/ only — components/src
# is not copied in. The generated file is already committed, so skip regeneration.
components_src = repo_root / "components" / "src"
if not components_src.exists():
print(
f"Note: {components_src} not found (operator-only build context). "
"Skipping Pydantic generation; using committed dgdr_v1beta1_types.py."
)
return 0
if not args.input.exists(): if not args.input.exists():
print(f"Error: Input file not found: {args.input}") print(f"Error: Input file not found: {args.input}")
return 1 return 1
......
...@@ -21,6 +21,7 @@ Validates that the generated Pydantic models can be imported and used correctly. ...@@ -21,6 +21,7 @@ Validates that the generated Pydantic models can be imported and used correctly.
import subprocess import subprocess
import sys import sys
import types
from pathlib import Path from pathlib import Path
...@@ -46,8 +47,50 @@ def _repo_root() -> Path: ...@@ -46,8 +47,50 @@ def _repo_root() -> Path:
return start return start
_components_src = _repo_root() / "components" / "src"
# In the operator Docker build the context is deploy/operator/ only — components/src
# is not copied in. The generated files are already committed, so skip validation.
if not _components_src.exists():
print(
f"Note: {_components_src} not found (operator-only build context). "
"Skipping Pydantic validation tests."
)
sys.exit(0)
# Add the components src to path so we can import the generated models # Add the components src to path so we can import the generated models
sys.path.insert(0, str(_repo_root() / "components" / "src")) sys.path.insert(0, str(_components_src))
# ---------------------------------------------------------------------------
# Stub dynamo.runtime.logging and bypass the heavy dynamo.planner.__init__
# before importing any dynamo module.
#
# dynamo itself must be a namespace-like package (has __path__) so that
# Python's import machinery can traverse down to dynamo.profiler from the
# filesystem. dynamo.planner is pre-registered as a stub to skip its heavy
# __init__.py, while still allowing dynamo.planner.utils.* to load normally.
# ---------------------------------------------------------------------------
_dynamo_path = str(_components_src / "dynamo")
_planner_path = str(_components_src / "dynamo" / "planner")
if "dynamo" not in sys.modules:
_dynamo_mod = types.ModuleType("dynamo")
_dynamo_mod.__path__ = [_dynamo_path] # type: ignore[attr-defined]
_dynamo_mod.__package__ = "dynamo"
sys.modules["dynamo"] = _dynamo_mod
if "dynamo.runtime" not in sys.modules:
_runtime_mod = types.ModuleType("dynamo.runtime")
sys.modules["dynamo.runtime"] = _runtime_mod
_logging_mod = types.ModuleType("dynamo.runtime.logging")
_logging_mod.configure_dynamo_logging = lambda *args, **kwargs: None # type: ignore[attr-defined]
sys.modules["dynamo.runtime.logging"] = _logging_mod
_planner_mod = types.ModuleType("dynamo.planner")
_planner_mod.__path__ = [_planner_path] # type: ignore[attr-defined]
_planner_mod.__package__ = "dynamo.planner"
sys.modules["dynamo.planner"] = _planner_mod
import pydantic # noqa: E402 import pydantic # noqa: E402
...@@ -61,8 +104,8 @@ from dynamo.profiler.utils.dgdr_v1beta1_types import ( # noqa: E402 ...@@ -61,8 +104,8 @@ from dynamo.profiler.utils.dgdr_v1beta1_types import ( # noqa: E402
MockerSpec, MockerSpec,
ModelCacheSpec, ModelCacheSpec,
OptimizationType, OptimizationType,
PlannerConfig,
PlannerPreDeploymentSweepMode, PlannerPreDeploymentSweepMode,
PlannerSpec,
ProfilingPhase, ProfilingPhase,
SearchStrategy, SearchStrategy,
SLASpec, SLASpec,
...@@ -105,7 +148,7 @@ def test_full_dgdr(): ...@@ -105,7 +148,7 @@ def test_full_dgdr():
pvcModelPath="llama-3.1-405b", pvcModelPath="llama-3.1-405b",
), ),
features=FeaturesSpec( features=FeaturesSpec(
planner=PlannerSpec(enabled=True), planner=PlannerConfig(enable_load_scaling=False),
mocker=MockerSpec(enabled=False), mocker=MockerSpec(enabled=False),
), ),
searchStrategy=SearchStrategy.Rapid, searchStrategy=SearchStrategy.Rapid,
...@@ -120,7 +163,7 @@ def test_full_dgdr(): ...@@ -120,7 +163,7 @@ def test_full_dgdr():
assert spec.sla.itl == 10.0 assert spec.sla.itl == 10.0
assert spec.modelCache.pvcName == "model-cache" assert spec.modelCache.pvcName == "model-cache"
assert spec.modelCache.pvcModelPath == "llama-3.1-405b" assert spec.modelCache.pvcModelPath == "llama-3.1-405b"
assert spec.features.planner.enabled is True assert isinstance(spec.features.planner, PlannerConfig)
assert spec.features.mocker.enabled is False assert spec.features.mocker.enabled is False
print("✓ Full DGDR spec validation passed") print("✓ Full DGDR spec validation passed")
......
...@@ -72,10 +72,16 @@ ...@@ -72,10 +72,16 @@
// Spec.DeploymentOverrides.{Name, nvidia.com/dgdr-deployment-overrides // Spec.DeploymentOverrides.{Name, nvidia.com/dgdr-deployment-overrides
// Namespace,Labels,Annotations} // Namespace,Labels,Annotations}
// //
// Planner config (opaque blob stored verbatim under blob["planner"]):
//
// v1beta1 blob key
// ────────────────────────────────────────── ──────────────────────────────────────
// Features.Planner (*runtime.RawExtension) planner.* (JSON fields written directly)
//
// v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO): // v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO):
// //
// Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType}, // Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType},
// Features.{Planner.*,KVRouter}, SearchStrategy // Features.{KVRouter}, SearchStrategy
// //
// # Status field mapping // # Status field mapping
// //
...@@ -146,6 +152,7 @@ import ( ...@@ -146,6 +152,7 @@ import (
batchv1 "k8s.io/api/batch/v1" batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/conversion" "sigs.k8s.io/controller-runtime/pkg/conversion"
) )
...@@ -239,6 +246,7 @@ func convertDGDRSpecTo(src *DynamoGraphDeploymentRequestSpec, dst *v1beta1.Dynam ...@@ -239,6 +246,7 @@ func convertDGDRSpecTo(src *DynamoGraphDeploymentRequestSpec, dst *v1beta1.Dynam
} }
applySLAAndWorkloadFromBlob(blob, dst) applySLAAndWorkloadFromBlob(blob, dst)
applyModelCacheFromBlob(blob, dst) applyModelCacheFromBlob(blob, dst)
applyPlannerFromBlob(blob, dst)
setAnnotation(dstObj, annDGDRProfilingConfig, string(src.ProfilingConfig.Config.Raw)) setAnnotation(dstObj, annDGDRProfilingConfig, string(src.ProfilingConfig.Config.Raw))
} }
...@@ -426,6 +434,12 @@ func convertDGDRSpecFrom(src *v1beta1.DynamoGraphDeploymentRequestSpec, dst *Dyn ...@@ -426,6 +434,12 @@ func convertDGDRSpecFrom(src *v1beta1.DynamoGraphDeploymentRequestSpec, dst *Dyn
} }
mergeModelCacheIntoBlob(src.ModelCache, blob) mergeModelCacheIntoBlob(src.ModelCache, blob)
} }
if src.Features != nil && src.Features.Planner != nil {
if blob == nil {
blob = make(map[string]interface{})
}
mergePlannerIntoBlob(src.Features.Planner, blob)
}
if blob != nil { if blob != nil {
if data, err := json.Marshal(blob); err == nil { if data, err := json.Marshal(blob); err == nil {
dst.ProfilingConfig.Config = &apiextensionsv1.JSON{Raw: data} dst.ProfilingConfig.Config = &apiextensionsv1.JSON{Raw: data}
...@@ -491,6 +505,39 @@ func mergeModelCacheIntoBlob(mc *v1beta1.ModelCacheSpec, blob map[string]interfa ...@@ -491,6 +505,39 @@ func mergeModelCacheIntoBlob(mc *v1beta1.ModelCacheSpec, blob map[string]interfa
} }
} }
// mergePlannerIntoBlob writes the planner RawExtension into blob["planner"].
// The RawExtension is the full PlannerConfig JSON blob (opaque to Go).
func mergePlannerIntoBlob(planner *runtime.RawExtension, blob map[string]interface{}) {
if planner == nil || planner.Raw == nil {
return
}
var plannerMap map[string]interface{}
if err := json.Unmarshal(planner.Raw, &plannerMap); err != nil || len(plannerMap) == 0 {
return
}
blob["planner"] = plannerMap
}
// applyPlannerFromBlob extracts blob["planner"] and populates v1beta1 Features.Planner.
func applyPlannerFromBlob(blob map[string]interface{}, dst *v1beta1.DynamoGraphDeploymentRequestSpec) {
plannerRaw, ok := blob["planner"]
if !ok {
return
}
plannerMap, ok := plannerRaw.(map[string]interface{})
if !ok || len(plannerMap) == 0 {
return
}
raw, err := json.Marshal(plannerMap)
if err != nil {
return
}
if dst.Features == nil {
dst.Features = &v1beta1.FeaturesSpec{}
}
dst.Features.Planner = &runtime.RawExtension{Raw: raw}
}
// restoreAnnotationFields restores v1alpha1 spec fields that were annotation-preserved // restoreAnnotationFields restores v1alpha1 spec fields that were annotation-preserved
// during ConvertTo: ConfigMapRef, OutputPVC, and DeploymentOverrides. // during ConvertTo: ConfigMapRef, OutputPVC, and DeploymentOverrides.
func restoreAnnotationFields(srcObj *v1beta1.DynamoGraphDeploymentRequest, dst *DynamoGraphDeploymentRequestSpec) { func restoreAnnotationFields(srcObj *v1beta1.DynamoGraphDeploymentRequest, dst *DynamoGraphDeploymentRequestSpec) {
......
...@@ -21,6 +21,9 @@ import ( ...@@ -21,6 +21,9 @@ import (
"encoding/json" "encoding/json"
"testing" "testing"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
v1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1" v1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
...@@ -43,6 +46,9 @@ func newV1alpha1DGDR() *DynamoGraphDeploymentRequest { ...@@ -43,6 +46,9 @@ func newV1alpha1DGDR() *DynamoGraphDeploymentRequest {
"pvcMountPath": "/data/model", "pvcMountPath": "/data/model",
}, },
}, },
"planner": map[string]interface{}{
"enable_load_scaling": false,
},
"extra_key": "preserved", "extra_key": "preserved",
} }
blobRaw, _ := json.Marshal(profilingBlob) blobRaw, _ := json.Marshal(profilingBlob)
...@@ -94,6 +100,7 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest { ...@@ -94,6 +100,7 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest {
osl := int32(256) osl := int32(256)
rawDGD, _ := json.Marshal(map[string]interface{}{"apiVersion": "nvidia.com/v1alpha1", "kind": "DynamoGraphDeployment"}) rawDGD, _ := json.Marshal(map[string]interface{}{"apiVersion": "nvidia.com/v1alpha1", "kind": "DynamoGraphDeployment"})
rawPlanner, _ := json.Marshal(map[string]interface{}{"enable_load_scaling": false})
return &v1beta1.DynamoGraphDeploymentRequest{ return &v1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
...@@ -119,7 +126,8 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest { ...@@ -119,7 +126,8 @@ func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest {
PVCMountPath: "/models", PVCMountPath: "/models",
}, },
Features: &v1beta1.FeaturesSpec{ Features: &v1beta1.FeaturesSpec{
Mocker: &v1beta1.MockerSpec{Enabled: true}, Mocker: &v1beta1.MockerSpec{Enabled: true},
Planner: &runtime.RawExtension{Raw: rawPlanner},
}, },
}, },
Status: v1beta1.DynamoGraphDeploymentRequestStatus{ Status: v1beta1.DynamoGraphDeploymentRequestStatus{
...@@ -269,44 +277,9 @@ func TestAlpha1RoundTrip(t *testing.T) { ...@@ -269,44 +277,9 @@ func TestAlpha1RoundTrip(t *testing.T) {
} }
// --- Spec checks --- // --- Spec checks ---
if restored.Spec.Model != original.Spec.Model { // ProfilingConfig.Config (raw JSON blob) is verified separately below.
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model) if diff := cmp.Diff(original.Spec, restored.Spec, cmpopts.IgnoreFields(ProfilingConfigSpec{}, "Config")); diff != "" {
} t.Errorf("Spec mismatch after round-trip (-want +got):\n%s", diff)
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.UseMocker != original.Spec.UseMocker {
t.Errorf("Spec.UseMocker: got %v, want %v", restored.Spec.UseMocker, original.Spec.UseMocker)
}
if restored.Spec.ProfilingConfig.ProfilerImage != original.Spec.ProfilingConfig.ProfilerImage {
t.Errorf("ProfilingConfig.ProfilerImage: got %q, want %q", restored.Spec.ProfilingConfig.ProfilerImage, original.Spec.ProfilingConfig.ProfilerImage)
}
if restored.Spec.ProfilingConfig.OutputPVC != original.Spec.ProfilingConfig.OutputPVC {
t.Errorf("ProfilingConfig.OutputPVC: got %q, want %q", restored.Spec.ProfilingConfig.OutputPVC, original.Spec.ProfilingConfig.OutputPVC)
}
// ConfigMapRef round-trip
if restored.Spec.ProfilingConfig.ConfigMapRef == nil {
t.Fatal("ProfilingConfig.ConfigMapRef is nil after round-trip")
}
if restored.Spec.ProfilingConfig.ConfigMapRef.Name != original.Spec.ProfilingConfig.ConfigMapRef.Name {
t.Errorf("ConfigMapRef.Name: got %q, want %q", restored.Spec.ProfilingConfig.ConfigMapRef.Name, original.Spec.ProfilingConfig.ConfigMapRef.Name)
}
// EnableGPUDiscovery round-trip
if restored.Spec.EnableGPUDiscovery == nil || !*restored.Spec.EnableGPUDiscovery {
t.Error("Spec.EnableGPUDiscovery: expected true after round-trip")
}
// DeploymentOverrides round-trip
if restored.Spec.DeploymentOverrides == nil {
t.Fatal("Spec.DeploymentOverrides is nil after round-trip")
}
if restored.Spec.DeploymentOverrides.Name != original.Spec.DeploymentOverrides.Name {
t.Errorf("DeploymentOverrides.Name: got %q, want %q", restored.Spec.DeploymentOverrides.Name, original.Spec.DeploymentOverrides.Name)
} }
// JSON blob round-trip: SLA fields re-emerge in ProfilingConfig.Config // JSON blob round-trip: SLA fields re-emerge in ProfilingConfig.Config
...@@ -331,28 +304,18 @@ func TestAlpha1RoundTrip(t *testing.T) { ...@@ -331,28 +304,18 @@ func TestAlpha1RoundTrip(t *testing.T) {
if blob["extra_key"] != "preserved" { if blob["extra_key"] != "preserved" {
t.Errorf("extra_key: got %v, want %q", blob["extra_key"], "preserved") t.Errorf("extra_key: got %v, want %q", blob["extra_key"], "preserved")
} }
// Planner round-trip via applyPlannerFromBlob / mergePlannerIntoBlob
// --- Status checks --- plannerMap, _ := blob["planner"].(map[string]interface{})
if restored.Status.State != original.Status.State { if plannerMap == nil {
t.Errorf("Status.State: got %q, want %q", restored.Status.State, original.Status.State) t.Fatal("planner key missing in restored JSON blob")
} }
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration { if plannerMap["enable_load_scaling"] != false {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration) t.Errorf("planner.enable_load_scaling: got %v, want false", plannerMap["enable_load_scaling"])
} }
if restored.Status.Backend != original.Status.Backend {
t.Errorf("Status.Backend: got %q, want %q", restored.Status.Backend, original.Status.Backend) // --- Status checks ---
} if diff := cmp.Diff(original.Status, restored.Status); diff != "" {
if restored.Status.ProfilingResults != original.Status.ProfilingResults { t.Errorf("Status mismatch after round-trip (-want +got):\n%s", diff)
t.Errorf("Status.ProfilingResults: got %q, want %q", restored.Status.ProfilingResults, original.Status.ProfilingResults)
}
if restored.Status.Deployment == nil {
t.Fatal("Status.Deployment is nil after round-trip")
}
if restored.Status.Deployment.Name != original.Status.Deployment.Name {
t.Errorf("Status.Deployment.Name: got %q, want %q", restored.Status.Deployment.Name, original.Status.Deployment.Name)
}
if restored.Status.Deployment.Created != original.Status.Deployment.Created {
t.Errorf("Status.Deployment.Created: got %v, want %v", restored.Status.Deployment.Created, original.Status.Deployment.Created)
} }
} }
...@@ -373,79 +336,17 @@ func TestHubRoundTrip(t *testing.T) { ...@@ -373,79 +336,17 @@ func TestHubRoundTrip(t *testing.T) {
} }
// --- Spec checks --- // --- Spec checks ---
if restored.Spec.Model != original.Spec.Model { if diff := cmp.Diff(original.Spec, restored.Spec); diff != "" {
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model) t.Errorf("Spec mismatch after round-trip (-want +got):\n%s", diff)
}
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.Image != original.Spec.Image {
t.Errorf("Spec.Image: got %q, want %q", restored.Spec.Image, original.Spec.Image)
}
// UseMocker round-trip via Features.Mocker.Enabled
if restored.Spec.Features == nil || restored.Spec.Features.Mocker == nil {
t.Fatal("Spec.Features.Mocker is nil after round-trip")
}
if restored.Spec.Features.Mocker.Enabled != original.Spec.Features.Mocker.Enabled {
t.Errorf("Features.Mocker.Enabled: got %v, want %v", restored.Spec.Features.Mocker.Enabled, original.Spec.Features.Mocker.Enabled)
}
// SLA round-trip via JSON blob
if restored.Spec.SLA == nil {
t.Fatal("Spec.SLA is nil after round-trip")
}
if restored.Spec.SLA.TTFT == nil || *restored.Spec.SLA.TTFT != *original.Spec.SLA.TTFT {
t.Errorf("SLA.TTFT: got %v, want %v", restored.Spec.SLA.TTFT, original.Spec.SLA.TTFT)
}
if restored.Spec.SLA.ITL == nil || *restored.Spec.SLA.ITL != *original.Spec.SLA.ITL {
t.Errorf("SLA.ITL: got %v, want %v", restored.Spec.SLA.ITL, original.Spec.SLA.ITL)
}
// Workload round-trip via JSON blob
if restored.Spec.Workload == nil {
t.Fatal("Spec.Workload is nil after round-trip")
}
if restored.Spec.Workload.ISL == nil || *restored.Spec.Workload.ISL != *original.Spec.Workload.ISL {
t.Errorf("Workload.ISL: got %v, want %v", restored.Spec.Workload.ISL, original.Spec.Workload.ISL)
}
if restored.Spec.Workload.OSL == nil || *restored.Spec.Workload.OSL != *original.Spec.Workload.OSL {
t.Errorf("Workload.OSL: got %v, want %v", restored.Spec.Workload.OSL, original.Spec.Workload.OSL)
}
// ModelCache round-trip via JSON blob
if restored.Spec.ModelCache == nil {
t.Fatal("Spec.ModelCache is nil after round-trip")
}
if restored.Spec.ModelCache.PVCName != original.Spec.ModelCache.PVCName {
t.Errorf("ModelCache.PVCName: got %q, want %q", restored.Spec.ModelCache.PVCName, original.Spec.ModelCache.PVCName)
}
if restored.Spec.ModelCache.PVCModelPath != original.Spec.ModelCache.PVCModelPath {
t.Errorf("ModelCache.PVCModelPath: got %q, want %q", restored.Spec.ModelCache.PVCModelPath, original.Spec.ModelCache.PVCModelPath)
}
if restored.Spec.ModelCache.PVCMountPath != original.Spec.ModelCache.PVCMountPath {
t.Errorf("ModelCache.PVCMountPath: got %q, want %q", restored.Spec.ModelCache.PVCMountPath, original.Spec.ModelCache.PVCMountPath)
} }
// --- Status checks --- // --- Status checks ---
// Deployed → Ready (lossy: v1alpha1 has no "Deployed" state; maps to "Ready") // Phase is intentionally lossy: DGDRPhaseDeployed → Ready → Ready
// then on the way back Ready→Ready
if restored.Status.Phase != v1beta1.DGDRPhaseReady { if restored.Status.Phase != v1beta1.DGDRPhaseReady {
t.Errorf("Status.Phase: got %q, want %q (Deployed→Ready is lossy)", restored.Status.Phase, v1beta1.DGDRPhaseReady) t.Errorf("Status.Phase: got %q, want %q (Deployed→Ready is lossy)", restored.Status.Phase, v1beta1.DGDRPhaseReady)
} }
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration { if diff := cmp.Diff(original.Status, restored.Status, cmpopts.IgnoreFields(v1beta1.DynamoGraphDeploymentRequestStatus{}, "Phase")); diff != "" {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration) t.Errorf("Status mismatch after round-trip (-want +got):\n%s", diff)
}
// DGDName round-trip
if restored.Status.DGDName != original.Status.DGDName {
t.Errorf("Status.DGDName: got %q, want %q", restored.Status.DGDName, original.Status.DGDName)
}
// ProfilingJobName round-trip via annotation
if restored.Status.ProfilingJobName != original.Status.ProfilingJobName {
t.Errorf("Status.ProfilingJobName: got %q, want %q", restored.Status.ProfilingJobName, original.Status.ProfilingJobName)
} }
// GeneratedDeployment round-trip via ProfilingResults.SelectedConfig // GeneratedDeployment round-trip via ProfilingResults.SelectedConfig
if restored.Status.ProfilingResults == nil || restored.Status.ProfilingResults.SelectedConfig == nil { if restored.Status.ProfilingResults == nil || restored.Status.ProfilingResults.SelectedConfig == nil {
......
...@@ -286,35 +286,6 @@ type OverridesSpec struct { ...@@ -286,35 +286,6 @@ type OverridesSpec struct {
DGD *runtime.RawExtension `json:"dgd,omitempty"` DGD *runtime.RawExtension `json:"dgd,omitempty"`
} }
// PlannerPreDeploymentSweepMode controls pre-deployment sweeping thoroughness for planner profiling.
// +kubebuilder:validation:Enum=none;rapid;thorough
type PlannerPreDeploymentSweepMode string
const (
PlannerPreDeploymentSweepModeNone PlannerPreDeploymentSweepMode = "none"
PlannerPreDeploymentSweepModeRapid PlannerPreDeploymentSweepMode = "rapid"
PlannerPreDeploymentSweepModeThorough PlannerPreDeploymentSweepMode = "thorough"
)
// PlannerSpec configures the SLA planner for autoscaling in the generated DGD.
type PlannerSpec struct {
// Enabled indicates whether the planner is enabled.
// +optional
Enabled bool `json:"enabled,omitempty"`
// PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
// "none" means no pre-deployment sweep (only load-based scaling).
// "rapid" uses AI Configurator to simulate engine performance.
// "thorough" uses real GPUs to measure engine performance (takes several hours).
// +optional
// +kubebuilder:validation:Enum=none;rapid;thorough
PlannerPreDeploymentSweeping *PlannerPreDeploymentSweepMode `json:"plannerPreDeploymentSweeping,omitempty"`
// PlannerArgsList is a list of additional planner arguments.
// +optional
PlannerArgsList []string `json:"plannerArgsList,omitempty"`
}
// MockerSpec configures the simulated (mocker) backend. // MockerSpec configures the simulated (mocker) backend.
type MockerSpec struct { type MockerSpec struct {
// Enabled indicates whether to deploy mocker workers instead of real inference workers. // Enabled indicates whether to deploy mocker workers instead of real inference workers.
...@@ -333,9 +304,14 @@ type KVRouterSpec struct { ...@@ -333,9 +304,14 @@ type KVRouterSpec struct {
// FeaturesSpec controls optional Dynamo platform features in the generated deployment. // FeaturesSpec controls optional Dynamo platform features in the generated deployment.
type FeaturesSpec struct { type FeaturesSpec struct {
// Planner configures the SLA planner for autoscaling in the generated DGD. // Planner is the raw SLA planner configuration passed to the planner service.
// Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
// Go treats this as opaque bytes; the Planner service validates it at startup.
// The presence of this field (non-null) enables the planner in the generated DGD.
// +optional // +optional
Planner *PlannerSpec `json:"planner,omitempty"` // +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Type=object
Planner *runtime.RawExtension `json:"planner,omitempty"`
// TODO: KVRouter support is not yet implemented in the operator. // TODO: KVRouter support is not yet implemented in the operator.
// KVRouter *KVRouterSpec `json:"kvRouter,omitempty"` // KVRouter *KVRouterSpec `json:"kvRouter,omitempty"`
......
...@@ -209,7 +209,7 @@ func (in *FeaturesSpec) DeepCopyInto(out *FeaturesSpec) { ...@@ -209,7 +209,7 @@ func (in *FeaturesSpec) DeepCopyInto(out *FeaturesSpec) {
*out = *in *out = *in
if in.Planner != nil { if in.Planner != nil {
in, out := &in.Planner, &out.Planner in, out := &in.Planner, &out.Planner
*out = new(PlannerSpec) *out = new(runtime.RawExtension)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.Mocker != nil { if in.Mocker != nil {
...@@ -345,31 +345,6 @@ func (in *ParetoConfig) DeepCopy() *ParetoConfig { ...@@ -345,31 +345,6 @@ func (in *ParetoConfig) DeepCopy() *ParetoConfig {
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PlannerSpec) DeepCopyInto(out *PlannerSpec) {
*out = *in
if in.PlannerPreDeploymentSweeping != nil {
in, out := &in.PlannerPreDeploymentSweeping, &out.PlannerPreDeploymentSweeping
*out = new(PlannerPreDeploymentSweepMode)
**out = **in
}
if in.PlannerArgsList != nil {
in, out := &in.PlannerArgsList, &out.PlannerArgsList
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlannerSpec.
func (in *PlannerSpec) DeepCopy() *PlannerSpec {
if in == nil {
return nil
}
out := new(PlannerSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProfilingResultsStatus) DeepCopyInto(out *ProfilingResultsStatus) { func (in *ProfilingResultsStatus) DeepCopyInto(out *ProfilingResultsStatus) {
*out = *in *out = *in
......
...@@ -563,33 +563,13 @@ spec: ...@@ -563,33 +563,13 @@ spec:
type: boolean type: boolean
type: object type: object
planner: planner:
description: Planner configures the SLA planner for autoscaling in the generated DGD. description: |-
properties: Planner is the raw SLA planner configuration passed to the planner service.
enabled: Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.
description: Enabled indicates whether the planner is enabled. Go treats this as opaque bytes; the Planner service validates it at startup.
type: boolean The presence of this field (non-null) enables the planner in the generated DGD.
plannerArgsList:
description: PlannerArgsList is a list of additional planner arguments.
items:
type: string
type: array
plannerPreDeploymentSweeping:
allOf:
- enum:
- none
- rapid
- thorough
- enum:
- none
- rapid
- thorough
description: |-
PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
"none" means no pre-deployment sweep (only load-based scaling).
"rapid" uses AI Configurator to simulate engine performance.
"thorough" uses real GPUs to measure engine performance (takes several hours).
type: string
type: object type: object
x-kubernetes-preserve-unknown-fields: true
type: object type: object
hardware: hardware:
description: |- description: |-
......
...@@ -1373,7 +1373,7 @@ _Appears in:_ ...@@ -1373,7 +1373,7 @@ _Appears in:_
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `planner` _[PlannerSpec](#plannerspec)_ | Planner configures the SLA planner for autoscaling in the generated DGD. | | Optional: \{\} <br /> | | `planner` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | Planner is the raw SLA planner configuration passed to the planner service.<br />Its schema is defined by dynamo.planner.utils.planner_config.PlannerConfig.<br />Go treats this as opaque bytes; the Planner service validates it at startup.<br />The presence of this field (non-null) enables the planner in the generated DGD. | | Type: object <br />Optional: \{\} <br /> |
| `mocker` _[MockerSpec](#mockerspec)_ | Mocker configures the simulated (mocker) backend for testing without GPUs. | | Optional: \{\} <br /> | | `mocker` _[MockerSpec](#mockerspec)_ | Mocker configures the simulated (mocker) backend for testing without GPUs. | | Optional: \{\} <br /> |
...@@ -1485,43 +1485,6 @@ _Appears in:_ ...@@ -1485,43 +1485,6 @@ _Appears in:_
| `config` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | Config is the full deployment configuration for this Pareto point. | | Type: object <br /> | | `config` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | Config is the full deployment configuration for this Pareto point. | | Type: object <br /> |
#### PlannerPreDeploymentSweepMode
_Underlying type:_ _string_
PlannerPreDeploymentSweepMode controls pre-deployment sweeping thoroughness for planner profiling.
_Validation:_
- Enum: [none rapid thorough]
_Appears in:_
- [PlannerSpec](#plannerspec)
| Field | Description |
| --- | --- |
| `none` | |
| `rapid` | |
| `thorough` | |
#### PlannerSpec
PlannerSpec configures the SLA planner for autoscaling in the generated DGD.
_Appears in:_
- [FeaturesSpec](#featuresspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `enabled` _boolean_ | Enabled indicates whether the planner is enabled. | | Optional: \{\} <br /> |
| `plannerPreDeploymentSweeping` _[PlannerPreDeploymentSweepMode](#plannerpredeploymentsweepmode)_ | PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.<br />"none" means no pre-deployment sweep (only load-based scaling).<br />"rapid" uses AI Configurator to simulate engine performance.<br />"thorough" uses real GPUs to measure engine performance (takes several hours). | | Enum: [none rapid thorough] <br />Optional: \{\} <br /> |
| `plannerArgsList` _string array_ | PlannerArgsList is a list of additional planner arguments. | | Optional: \{\} <br /> |
#### ProfilingPhase #### ProfilingPhase
_Underlying type:_ _string_ _Underlying type:_ _string_
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment