Unverified Commit 611e856d authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: hide optimizationType (#7160)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent 012236ee
......@@ -52,11 +52,6 @@ class ProfilingPhase(str, Enum):
Done = "Done"
class OptimizationType(str, Enum):
Latency = "latency"
Throughput = "throughput"
class SearchStrategy(str, Enum):
Rapid = "rapid"
Thorough = "thorough"
......@@ -101,16 +96,11 @@ class WorkloadSpec(BaseModel):
class SLASpec(BaseModel):
"""Service-level agreement targets.
Provide exactly one of:
Provide one of:
- ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)
- ``e2eLatency``: end-to-end latency target
- ``optimizationType``: high-level objective without explicit numeric targets"""
- ``e2eLatency``: end-to-end latency target (mutually exclusive with ttft/itl)"""
optimizationType: Optional[OptimizationType] = Field(
default=None,
description="OptimizationType controls the profiling optimization strategy. Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.",
)
ttft: Optional[float] = Field(
default=2000,
description="TTFT is the Time To First Token target in milliseconds.",
......@@ -125,20 +115,17 @@ class SLASpec(BaseModel):
@model_validator(mode="after")
def _validate_sla_options(self) -> "SLASpec":
"""Ensure at most one SLA mode is active."""
"""Ensure e2eLatency and ttft/itl are not both provided."""
has_e2e = self.e2eLatency is not None
has_opt = self.optimizationType is not None
ttft_itl_touched = (
"ttft" in self.model_fields_set or "itl" in self.model_fields_set
)
has_ttft_itl = (self.ttft is not None and self.itl is not None) and (
ttft_itl_touched or (not has_e2e and not has_opt)
)
options_count = sum([has_ttft_itl, has_e2e, has_opt])
if options_count > 1:
has_ttft_itl = (
self.ttft is not None or self.itl is not None
) and ttft_itl_touched
if has_e2e and has_ttft_itl:
raise ValueError(
"SLA must specify exactly one of: (ttft and itl), e2eLatency, "
"or optimizationType — not multiple."
"SLA must specify either (ttft and itl) or e2eLatency, not both."
)
if (self.ttft is not None) != (self.itl is not None):
raise ValueError("ttft and itl must both be provided together.")
......
......@@ -9078,18 +9078,6 @@ spec:
itl:
description: ITL is the Inter-Token Latency target in milliseconds.
type: number
optimizationType:
allOf:
- enum:
- latency
- throughput
- enum:
- latency
- throughput
description: |-
OptimizationType controls the profiling optimization strategy.
Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
type: string
ttft:
description: TTFT is the Time To First Token target in milliseconds.
type: number
......
......@@ -53,10 +53,9 @@ _IMPORT_OVERRIDES: dict[str, tuple[str, str, bool]] = {
_STRUCT_DOCSTRINGS: dict = {
"SLASpec": (
"Service-level agreement targets.\n\n"
" Provide exactly one of:\n\n"
" Provide one of:\n\n"
" - ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)\n"
" - ``e2eLatency``: end-to-end latency target\n"
" - ``optimizationType``: high-level objective without explicit numeric targets"
" - ``e2eLatency``: end-to-end latency target (mutually exclusive with ttft/itl)"
),
}
......@@ -66,18 +65,15 @@ _STRUCT_EXTRAS: dict = {
"SLASpec": """\
@model_validator(mode="after")
def _validate_sla_options(self) -> "SLASpec":
\"\"\"Ensure at most one SLA mode is active.\"\"\"
\"\"\"Ensure e2eLatency and ttft/itl are not both provided.\"\"\"
has_e2e = self.e2eLatency is not None
has_opt = self.optimizationType is not None
ttft_itl_touched = "ttft" in self.model_fields_set or "itl" in self.model_fields_set
has_ttft_itl = (self.ttft is not None and self.itl is not None) and (
ttft_itl_touched or (not has_e2e and not has_opt)
ttft_itl_touched = (
"ttft" in self.model_fields_set or "itl" in self.model_fields_set
)
options_count = sum([has_ttft_itl, has_e2e, has_opt])
if options_count > 1:
has_ttft_itl = (self.ttft is not None or self.itl is not None) and ttft_itl_touched
if has_e2e and has_ttft_itl:
raise ValueError(
"SLA must specify exactly one of: (ttft and itl), e2eLatency, "
"or optimizationType \u2014 not multiple."
"SLA must specify either (ttft and itl) or e2eLatency, not both."
)
if (self.ttft is not None) != (self.itl is not None):
raise ValueError("ttft and itl must both be provided together.")
......
......@@ -103,7 +103,6 @@ from dynamo.profiler.utils.dgdr_v1beta1_types import ( # noqa: E402
FeaturesSpec,
MockerSpec,
ModelCacheSpec,
OptimizationType,
PlannerConfig,
PlannerPreDeploymentSweepMode,
ProfilingPhase,
......@@ -175,7 +174,6 @@ def test_sla_defaults_and_validation():
assert sla.ttft == 2000.0
assert sla.itl == 30.0
assert sla.e2eLatency is None
assert sla.optimizationType is None
print("✓ SLASpec defaults correct")
# explicit ttft+itl mode: OK
......@@ -184,13 +182,6 @@ def test_sla_defaults_and_validation():
# e2eLatency mode: OK (null out ttft/itl)
SLASpec(ttft=None, itl=None, e2eLatency=500.0)
# optimizationType mode: OK (null out ttft/itl)
SLASpec(ttft=None, itl=None, optimizationType=OptimizationType.Throughput)
# optimizationType mode: OK without explicitly nulling defaults (TC-2.5 / TC-2.6)
SLASpec(optimizationType=OptimizationType.Throughput)
SLASpec(optimizationType=OptimizationType.Latency)
# e2eLatency mode: OK without explicitly nulling defaults
SLASpec(e2eLatency=500.0)
......@@ -230,10 +221,6 @@ def test_enums():
assert ProfilingPhase.Initializing == "Initializing"
assert ProfilingPhase.SweepingPrefill == "SweepingPrefill"
# OptimizationType — TitleCase from Go const names
assert OptimizationType.Latency == "latency"
assert OptimizationType.Throughput == "throughput"
# SearchStrategy — TitleCase from Go const names
assert SearchStrategy.Rapid == "rapid"
assert SearchStrategy.Thorough == "thorough"
......
......@@ -80,7 +80,7 @@
//
// v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO):
//
// Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType},
// Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency},
// Features.{KVRouter}, SearchStrategy
//
// # Status field mapping
......
......@@ -165,15 +165,6 @@ const (
ProfilingReasonJobCreationFailed = "JobCreationFailed"
)
// OptimizationType specifies the profiling optimization strategy.
// +kubebuilder:validation:Enum=latency;throughput
type OptimizationType string
const (
OptimizationTypeLatency OptimizationType = "latency"
OptimizationTypeThroughput OptimizationType = "throughput"
)
// SearchStrategy controls the profiling search depth.
// +kubebuilder:validation:Enum=rapid;thorough
type SearchStrategy string
......@@ -231,14 +222,7 @@ type WorkloadSpec struct {
}
// SLASpec defines the service-level agreement targets for profiling optimization.
// Exactly one mode should be active: ttft+itl (default), e2eLatency, or optimizationType.
type SLASpec struct {
// OptimizationType controls the profiling optimization strategy.
// Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
// +optional
// +kubebuilder:validation:Enum=latency;throughput
OptimizationType OptimizationType `json:"optimizationType,omitempty"`
// TTFT is the Time To First Token target in milliseconds.
// +optional
// +python-default=2000
......
......@@ -9078,18 +9078,6 @@ spec:
itl:
description: ITL is the Inter-Token Latency target in milliseconds.
type: number
optimizationType:
allOf:
- enum:
- latency
- throughput
- enum:
- latency
- throughput
description: |-
OptimizationType controls the profiling optimization strategy.
Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
type: string
ttft:
description: TTFT is the Time To First Token target in milliseconds.
type: number
......
......@@ -1480,24 +1480,6 @@ _Appears in:_
| `pvcMountPath` _string_ | PVCMountPath is the mount path for the PVC inside the container. | /opt/model-cache | Optional: \{\} <br /> |
#### OptimizationType
_Underlying type:_ _string_
OptimizationType specifies the profiling optimization strategy.
_Validation:_
- Enum: [latency throughput]
_Appears in:_
- [SLASpec](#slaspec)
| Field | Description |
| --- | --- |
| `latency` | |
| `throughput` | |
#### OverridesSpec
......@@ -1579,7 +1561,6 @@ _Appears in:_
SLASpec defines the service-level agreement targets for profiling optimization.
Exactly one mode should be active: ttft+itl (default), e2eLatency, or optimizationType.
......@@ -1588,7 +1569,6 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `optimizationType` _[OptimizationType](#optimizationtype)_ | OptimizationType controls the profiling optimization strategy.<br />Use when explicit SLA targets (ttft+itl or e2eLatency) are not known. | | Enum: [latency throughput] <br />Optional: \{\} <br /> |
| `ttft` _float_ | TTFT is the Time To First Token target in milliseconds. | | Optional: \{\} <br /> |
| `itl` _float_ | ITL is the Inter-Token Latency target in milliseconds. | | Optional: \{\} <br /> |
| `e2eLatency` _float_ | E2ELatency is the target end-to-end request latency in milliseconds.<br />Alternative to specifying TTFT + ITL. | | Optional: \{\} <br /> |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment