@@ -62,6 +62,15 @@ class SearchStrategy(str, Enum):
...
@@ -62,6 +62,15 @@ class SearchStrategy(str, Enum):
Thorough="thorough"
Thorough="thorough"
classGPUSKUType(str,Enum):
GB200SXM="gb200_sxm"
H200SXM="h200_sxm"
H100SXM="h100_sxm"
B200SXM="b200_sxm"
A100SXM="a100_sxm"
L40S="l40s"
classBackendType(str,Enum):
classBackendType(str,Enum):
Auto="auto"
Auto="auto"
Sglang="sglang"
Sglang="sglang"
...
@@ -200,9 +209,9 @@ class FeaturesSpec(BaseModel):
...
@@ -200,9 +209,9 @@ class FeaturesSpec(BaseModel):
classHardwareSpec(BaseModel):
classHardwareSpec(BaseModel):
"""HardwareSpec describes the hardware resources available for profiling and deployment. These fields are typically auto-filled by the operator from cluster discovery."""
"""HardwareSpec describes the hardware resources available for profiling and deployment. These fields are typically auto-filled by the operator from cluster discovery."""
gpuSku:Optional[str]=Field(
gpuSku:Optional[GPUSKUType]=Field(
default=None,
default=None,
description='GPUSKU is the GPU SKU identifier (e.g., "H100_SXM", "A100_80GB").',
description="GPUSKU is the AIC hardware system identifier for the GPU. When omitted, the operator auto-detects this via InferHardwareSystem from cluster GPU node labels.",
)
)
vramMb:Optional[float]=Field(
vramMb:Optional[float]=Field(
default=None,description="VRAMMB is the VRAM per GPU in MiB."
default=None,description="VRAMMB is the VRAM per GPU in MiB."
| `gpuSku` _string_ | GPUSKU is the GPU SKU identifier (e.g., "H100_SXM", "A100_80GB"). | | Optional: \{\} <br /> |
| `gpuSku` _[GPUSKUType](#gpuskutype)_ | GPUSKU is the AIC hardware system identifier for the GPU.<br />When omitted, the operator auto-detects this via InferHardwareSystem from cluster GPU node labels. | | Enum: [gb200_sxm h200_sxm h100_sxm b200_sxm a100_sxm l40s] <br />Optional: \{\} <br /> |
| `vramMb` _float_ | VRAMMB is the VRAM per GPU in MiB. | | Optional: \{\} <br /> |
| `vramMb` _float_ | VRAMMB is the VRAM per GPU in MiB. | | Optional: \{\} <br /> |
| `totalGpus` _integer_ | TotalGPUs is the total number of GPUs available in the cluster. | | Optional: \{\} <br /> |
| `totalGpus` _integer_ | TotalGPUs is the total number of GPUs available in the cluster. | | Optional: \{\} <br /> |
| `numGpusPerNode` _integer_ | NumGPUsPerNode is the number of GPUs per node. | | Optional: \{\} <br /> |
| `numGpusPerNode` _integer_ | NumGPUsPerNode is the number of GPUs per node. | | Optional: \{\} <br /> |