feat: hide optimizationType (#7160)

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>

feat: hide optimizationType (#7160)
Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
611e856d · hhzhang16 · GitHub · 012236ee · 611e856d · 611e856d
Unverified Commit 611e856d authored Mar 10, 2026 by hhzhang16 Committed by GitHub Mar 11, 2026
8 changed files
--- a/components/src/dynamo/profiler/utils/dgdr_v1beta1_types.py
+++ b/components/src/dynamo/profiler/utils/dgdr_v1beta1_types.py
@@ -52,11 +52,6 @@ class ProfilingPhase(str, Enum):
    Done = "Done"


-class OptimizationType(str, Enum):
-    Latency = "latency"
-    Throughput = "throughput"
-
-
 class SearchStrategy(str, Enum):
    Rapid = "rapid"
    Thorough = "thorough"
@@ -101,16 +96,11 @@ class WorkloadSpec(BaseModel):
 class SLASpec(BaseModel):
    """Service-level agreement targets.

-    Provide exactly one of:
+    Provide one of:

    - ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)
-    - ``e2eLatency``: end-to-end latency target
-    - ``optimizationType``: high-level objective without explicit numeric targets"""
+    - ``e2eLatency``: end-to-end latency target (mutually exclusive with ttft/itl)"""

-    optimizationType: Optional[OptimizationType] = Field(
-        default=None,
-        description="OptimizationType controls the profiling optimization strategy. Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.",
-    )
    ttft: Optional[float] = Field(
        default=2000,
        description="TTFT is the Time To First Token target in milliseconds.",
@@ -125,20 +115,17 @@ class SLASpec(BaseModel):

    @model_validator(mode="after")
    def _validate_sla_options(self) -> "SLASpec":
-        """Ensure at most one SLA mode is active."""
+        """Ensure e2eLatency and ttft/itl are not both provided."""
        has_e2e = self.e2eLatency is not None
-        has_opt = self.optimizationType is not None
        ttft_itl_touched = (
            "ttft" in self.model_fields_set or "itl" in self.model_fields_set
        )
-        has_ttft_itl = (self.ttft is not None and self.itl is not None) and (
-            ttft_itl_touched or (not has_e2e and not has_opt)
-        )
-        options_count = sum([has_ttft_itl, has_e2e, has_opt])
-        if options_count > 1:
+        has_ttft_itl = (
+            self.ttft is not None or self.itl is not None
+        ) and ttft_itl_touched
+        if has_e2e and has_ttft_itl:
            raise ValueError(
-                "SLA must specify exactly one of: (ttft and itl), e2eLatency, "
-                "or optimizationType — not multiple."
+                "SLA must specify either (ttft and itl) or e2eLatency, not both."
            )
        if (self.ttft is not None) != (self.itl is not None):
            raise ValueError("ttft and itl must both be provided together.")

--- a/deploy/helm/charts/platform/components/operator/crds/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/helm/charts/platform/components/operator/crds/nvidia.com_dynamographdeploymentrequests.yaml
@@ -9078,18 +9078,6 @@ spec:
                    itl:
                      description: ITL is the Inter-Token Latency target in milliseconds.
                      type: number
-                    optimizationType:
-                      allOf:
-                        - enum:
-                            - latency
-                            - throughput
-                        - enum:
-                            - latency
-                            - throughput
-                      description: |-
-                        OptimizationType controls the profiling optimization strategy.
-                        Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
-                      type: string
                    ttft:
                      description: TTFT is the Time To First Token target in milliseconds.
                      type: number

--- a/deploy/operator/api/scripts/generate_pydantic_from_go.py
+++ b/deploy/operator/api/scripts/generate_pydantic_from_go.py
@@ -53,10 +53,9 @@ _IMPORT_OVERRIDES: dict[str, tuple[str, str, bool]] = {
 _STRUCT_DOCSTRINGS: dict = {
    "SLASpec": (
        "Service-level agreement targets.\n\n"
-        "    Provide exactly one of:\n\n"
+        "    Provide one of:\n\n"
        "    - ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)\n"
-        "    - ``e2eLatency``: end-to-end latency target\n"
-        "    - ``optimizationType``: high-level objective without explicit numeric targets"
+        "    - ``e2eLatency``: end-to-end latency target (mutually exclusive with ttft/itl)"
    ),
 }

@@ -66,18 +65,15 @@ _STRUCT_EXTRAS: dict = {
    "SLASpec": """\
    @model_validator(mode="after")
    def _validate_sla_options(self) -> "SLASpec":
-        \"\"\"Ensure at most one SLA mode is active.\"\"\"
+        \"\"\"Ensure e2eLatency and ttft/itl are not both provided.\"\"\"
        has_e2e = self.e2eLatency is not None
-        has_opt = self.optimizationType is not None
-        ttft_itl_touched = "ttft" in self.model_fields_set or "itl" in self.model_fields_set
-        has_ttft_itl = (self.ttft is not None and self.itl is not None) and (
-            ttft_itl_touched or (not has_e2e and not has_opt)
+        ttft_itl_touched = (
+            "ttft" in self.model_fields_set or "itl" in self.model_fields_set
        )
-        options_count = sum([has_ttft_itl, has_e2e, has_opt])
-        if options_count > 1:
+        has_ttft_itl = (self.ttft is not None or self.itl is not None) and ttft_itl_touched
+        if has_e2e and has_ttft_itl:
            raise ValueError(
-                "SLA must specify exactly one of: (ttft and itl), e2eLatency, "
-                "or optimizationType \u2014 not multiple."
+                "SLA must specify either (ttft and itl) or e2eLatency, not both."
            )
        if (self.ttft is not None) != (self.itl is not None):
            raise ValueError("ttft and itl must both be provided together.")

--- a/deploy/operator/api/scripts/validate_pydantic_models.py
+++ b/deploy/operator/api/scripts/validate_pydantic_models.py
@@ -103,7 +103,6 @@ from dynamo.profiler.utils.dgdr_v1beta1_types import (  # noqa: E402
    FeaturesSpec,
    MockerSpec,
    ModelCacheSpec,
-    OptimizationType,
    PlannerConfig,
    PlannerPreDeploymentSweepMode,
    ProfilingPhase,
@@ -175,7 +174,6 @@ def test_sla_defaults_and_validation():
    assert sla.ttft == 2000.0
    assert sla.itl == 30.0
    assert sla.e2eLatency is None
-    assert sla.optimizationType is None
    print("✓ SLASpec defaults correct")

    # explicit ttft+itl mode: OK
@@ -184,13 +182,6 @@ def test_sla_defaults_and_validation():
    # e2eLatency mode: OK (null out ttft/itl)
    SLASpec(ttft=None, itl=None, e2eLatency=500.0)

-    # optimizationType mode: OK (null out ttft/itl)
-    SLASpec(ttft=None, itl=None, optimizationType=OptimizationType.Throughput)
-
-    # optimizationType mode: OK without explicitly nulling defaults (TC-2.5 / TC-2.6)
-    SLASpec(optimizationType=OptimizationType.Throughput)
-    SLASpec(optimizationType=OptimizationType.Latency)
-
    # e2eLatency mode: OK without explicitly nulling defaults
    SLASpec(e2eLatency=500.0)

@@ -230,10 +221,6 @@ def test_enums():
    assert ProfilingPhase.Initializing == "Initializing"
    assert ProfilingPhase.SweepingPrefill == "SweepingPrefill"

-    # OptimizationType — TitleCase from Go const names
-    assert OptimizationType.Latency == "latency"
-    assert OptimizationType.Throughput == "throughput"
-
    # SearchStrategy — TitleCase from Go const names
    assert SearchStrategy.Rapid == "rapid"
    assert SearchStrategy.Thorough == "thorough"

--- a/deploy/operator/api/v1alpha1/dynamographdeploymentrequest_conversion.go
+++ b/deploy/operator/api/v1alpha1/dynamographdeploymentrequest_conversion.go
@@ -80,7 +80,7 @@
 //
 // v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO):
 //
-//	Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType},
+//	Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency},
 //	Features.{KVRouter}, SearchStrategy
 //
 // # Status field mapping

--- a/deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go
+++ b/deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go
@@ -165,15 +165,6 @@ const (
 	ProfilingReasonJobCreationFailed = "JobCreationFailed"
 )

-// OptimizationType specifies the profiling optimization strategy.
-// +kubebuilder:validation:Enum=latency;throughput
-type OptimizationType string
-
-const (
-	OptimizationTypeLatency    OptimizationType = "latency"
-	OptimizationTypeThroughput OptimizationType = "throughput"
-)
-
 // SearchStrategy controls the profiling search depth.
 // +kubebuilder:validation:Enum=rapid;thorough
 type SearchStrategy string
@@ -231,14 +222,7 @@ type WorkloadSpec struct {
 }

 // SLASpec defines the service-level agreement targets for profiling optimization.
-// Exactly one mode should be active: ttft+itl (default), e2eLatency, or optimizationType.
 type SLASpec struct {
-	// OptimizationType controls the profiling optimization strategy.
-	// Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
-	// +optional
-	// +kubebuilder:validation:Enum=latency;throughput
-	OptimizationType OptimizationType `json:"optimizationType,omitempty"`
-
 	// TTFT is the Time To First Token target in milliseconds.
 	// +optional
 	// +python-default=2000

--- a/deploy/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
+++ b/deploy/operator/config/crd/bases/nvidia.com_dynamographdeploymentrequests.yaml
@@ -9078,18 +9078,6 @@ spec:
                    itl:
                      description: ITL is the Inter-Token Latency target in milliseconds.
                      type: number
-                    optimizationType:
-                      allOf:
-                        - enum:
-                            - latency
-                            - throughput
-                        - enum:
-                            - latency
-                            - throughput
-                      description: |-
-                        OptimizationType controls the profiling optimization strategy.
-                        Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
-                      type: string
                    ttft:
                      description: TTFT is the Time To First Token target in milliseconds.
                      type: number

--- a/docs/kubernetes/api-reference.md
+++ b/docs/kubernetes/api-reference.md
@@ -1480,24 +1480,6 @@ _Appears in:_
 | `pvcMountPath` _string_ | PVCMountPath is the mount path for the PVC inside the container. | /opt/model-cache | Optional: \{\} <br /> |


-#### OptimizationType
-
-_Underlying type:_ _string_
-
-OptimizationType specifies the profiling optimization strategy.
-
-_Validation:_
- Enum: [latency throughput]
-
-_Appears in:_
- [SLASpec](#slaspec)
-
-| Field | Description |
-| --- | --- |
-| `latency` |  |
-| `throughput` |  |
-
-
 #### OverridesSpec


@@ -1579,7 +1561,6 @@ _Appears in:_


 SLASpec defines the service-level agreement targets for profiling optimization.
-Exactly one mode should be active: ttft+itl (default), e2eLatency, or optimizationType.



@@ -1588,7 +1569,6 @@ _Appears in:_

 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
-| `optimizationType` _[OptimizationType](#optimizationtype)_ | OptimizationType controls the profiling optimization strategy.<br />Use when explicit SLA targets (ttft+itl or e2eLatency) are not known. |  | Enum: [latency throughput] <br />Optional: \{\} <br /> |
 | `ttft` _float_ | TTFT is the Time To First Token target in milliseconds. |  | Optional: \{\} <br /> |
 | `itl` _float_ | ITL is the Inter-Token Latency target in milliseconds. |  | Optional: \{\} <br /> |
 | `e2eLatency` _float_ | E2ELatency is the target end-to-end request latency in milliseconds.<br />Alternative to specifying TTFT + ITL. |  | Optional: \{\} <br /> |