Unverified Commit ebc61637 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: Add v1beta1 DGDR API with conversion framework (#6352)


Signed-off-by: default avatarJont828 <jt572@cornell.edu>
Signed-off-by: default avatarHongkuan Zhou <hongkuanz@nvidia.com>
Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
Co-authored-by: default avatarJont828 <jt572@cornell.edu>
parent 7bbacce1
......@@ -117,6 +117,16 @@ jobs:
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
with:
go-version: '1.25'
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies for operator codegen
shell: bash
working-directory: ./deploy/operator
run: |
python -m pip install --upgrade pip
python -m pip install "pydantic>=2,<3" "black==23.1.0"
- name: Check for uncommitted changes
shell: bash
working-directory: ./deploy/operator
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Auto-generated Pydantic models from v1beta1 DGDR Go types.
Generated by: deploy/operator/api/scripts/generate_pydantic_from_go.py
Source: deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go
DO NOT EDIT MANUALLY - regenerate using the script.
"""
from enum import Enum
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field, model_validator
class DGDRPhase(str, Enum):
Pending = "Pending"
Profiling = "Profiling"
Ready = "Ready"
Deploying = "Deploying"
Deployed = "Deployed"
Failed = "Failed"
class ProfilingPhase(str, Enum):
Initializing = "Initializing"
SweepingPrefill = "SweepingPrefill"
SweepingDecode = "SweepingDecode"
SelectingConfig = "SelectingConfig"
BuildingCurves = "BuildingCurves"
GeneratingDGD = "GeneratingDGD"
Done = "Done"
class OptimizationType(str, Enum):
Latency = "latency"
Throughput = "throughput"
class SearchStrategy(str, Enum):
Rapid = "rapid"
Thorough = "thorough"
class BackendType(str, Enum):
Auto = "auto"
Sglang = "sglang"
Trtllm = "trtllm"
Vllm = "vllm"
class PlannerPreDeploymentSweepMode(str, Enum):
None_ = "none"
Rapid = "rapid"
Thorough = "thorough"
class WorkloadSpec(BaseModel):
"""WorkloadSpec defines the workload characteristics for SLA-based profiling."""
isl: Optional[int] = Field(
default=4000, description="ISL is the Input Sequence Length (number of tokens)."
)
osl: Optional[int] = Field(
default=1000,
description="OSL is the Output Sequence Length (number of tokens).",
)
concurrency: Optional[float] = Field(
default=None,
description="Concurrency is the target concurrency level. Required (or RequestRate) when the planner is disabled.",
)
requestRate: Optional[float] = Field(
default=None,
description="RequestRate is the target request rate (req/s). Required (or Concurrency) when the planner is disabled.",
)
class SLASpec(BaseModel):
"""Service-level agreement targets.
Provide exactly one of:
- ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)
- ``e2eLatency``: end-to-end latency target
- ``optimizationType``: high-level objective without explicit numeric targets"""
optimizationType: Optional[OptimizationType] = Field(
default=None,
description="OptimizationType controls the profiling optimization strategy. Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.",
)
ttft: Optional[float] = Field(
default=2000,
description="TTFT is the Time To First Token target in milliseconds.",
)
itl: Optional[float] = Field(
default=30, description="ITL is the Inter-Token Latency target in milliseconds."
)
e2eLatency: Optional[float] = Field(
default=None,
description="E2ELatency is the target end-to-end request latency in milliseconds. Alternative to specifying TTFT + ITL.",
)
@model_validator(mode="after")
def _validate_sla_options(self) -> "SLASpec":
"""Ensure at most one SLA mode is active."""
has_ttft_itl = self.ttft is not None and self.itl is not None
has_e2e = self.e2eLatency is not None
has_opt = self.optimizationType is not None
options_count = sum([has_ttft_itl, has_e2e, has_opt])
if options_count > 1:
raise ValueError(
"SLA must specify exactly one of: (ttft and itl), e2eLatency, "
"or optimizationType — not multiple."
)
if (self.ttft is not None) != (self.itl is not None):
raise ValueError("ttft and itl must both be provided together.")
return self
class ModelCacheSpec(BaseModel):
"""ModelCacheSpec references a PVC containing pre-downloaded model weights."""
pvcName: Optional[str] = Field(
default=None,
description="PVCName is the name of the PersistentVolumeClaim containing model weights. The PVC must exist in the same namespace as the DGDR.",
)
pvcModelPath: Optional[str] = Field(
default=None,
description='PVCModelPath is the path to the model checkpoint directory within the PVC (e.g. "deepseek-r1" or "models/Llama-3.1-405B-FP8").',
)
pvcMountPath: str = Field(
default="/opt/model-cache",
description="PVCMountPath is the mount path for the PVC inside the container.",
)
class OverridesSpec(BaseModel):
"""OverridesSpec allows customizing the profiling job and the generated DynamoGraphDeployment."""
profilingJob: Optional[Dict[str, Any]] = Field(
default=None,
description="ProfilingJob allows overriding the profiling Job specification. Fields set here are merged into the controller-generated Job spec.",
)
dgd: Optional[Dict[str, Any]] = Field(
default=None,
description="DGD allows providing a full or partial nvidia.com/v1alpha1 DynamoGraphDeployment to use as the base for the generated deployment. Fields from profiling results are merged on top. Use this to override backend worker images. The field is stored as a raw embedded resource rather than a typed *v1alpha1.DynamoGraphDeployment to avoid a circular import: v1alpha1 already imports v1beta1 as the conversion hub and Go does not allow import cycles. The EmbeddedResource marker tells the API server to validate that the value is a well-formed Kubernetes object (has apiVersion/kind), but does not enforce that it is specifically a DynamoGraphDeployment. Full type validation (correct apiVersion, kind, and field schema) is performed by the controller during reconciliation. TODO(future MR): add webhook admission validation for the DGD field type.",
)
class PlannerSpec(BaseModel):
"""PlannerSpec configures the SLA planner for autoscaling in the generated DGD."""
enabled: Optional[bool] = Field(
default=None, description="Enabled indicates whether the planner is enabled."
)
plannerPreDeploymentSweeping: Optional[PlannerPreDeploymentSweepMode] = Field(
default=None,
description='PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling. "none" means no pre-deployment sweep (only load-based scaling). "rapid" uses AI Configurator to simulate engine performance. "thorough" uses real GPUs to measure engine performance (takes several hours).',
)
plannerArgsList: Optional[List[str]] = Field(
default=None,
description="PlannerArgsList is a list of additional planner arguments.",
)
class MockerSpec(BaseModel):
"""MockerSpec configures the simulated (mocker) backend."""
enabled: Optional[bool] = Field(
default=None,
description="Enabled indicates whether to deploy mocker workers instead of real inference workers. Useful for large-scale testing without GPUs.",
)
class KVRouterSpec(BaseModel):
"""KVRouterSpec configures KV-cache-aware routing."""
enabled: Optional[bool] = Field(
default=None,
description="Enabled indicates whether to enable KV-cache-aware routing in the generated DGD. KV routing optimizes request scheduling based on KV cache locality.",
)
class FeaturesSpec(BaseModel):
"""FeaturesSpec controls optional Dynamo platform features in the generated deployment."""
planner: Optional[PlannerSpec] = Field(
default=None,
description="Planner configures the SLA planner for autoscaling in the generated DGD.",
)
mocker: Optional[MockerSpec] = Field(
default=None,
description="Mocker configures the simulated (mocker) backend for testing without GPUs.",
)
class HardwareSpec(BaseModel):
"""HardwareSpec describes the hardware resources available for profiling and deployment. These fields are typically auto-filled by the operator from cluster discovery."""
gpuSku: Optional[str] = Field(
default=None,
description='GPUSKU is the GPU SKU identifier (e.g., "H100_SXM", "A100_80GB").',
)
vramMb: Optional[float] = Field(
default=None, description="VRAMMB is the VRAM per GPU in MiB."
)
totalGpus: Optional[int] = Field(
default=None,
description="TotalGPUs is the total number of GPUs available in the cluster.",
)
numGpusPerNode: Optional[int] = Field(
default=None, description="NumGPUsPerNode is the number of GPUs per node."
)
class DynamoGraphDeploymentRequestSpec(BaseModel):
"""DynamoGraphDeploymentRequestSpec defines the desired state of a DynamoGraphDeploymentRequest. Only the Model field is required; all other fields are optional and have sensible defaults."""
model: str = Field(
description='Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b"). Can be a HuggingFace ID or a private model name.'
)
backend: BackendType = Field(
default="auto",
description="Backend specifies the inference backend to use for profiling and deployment.",
)
image: Optional[str] = Field(
default=None,
description='Image is the container image reference for the profiling job (frontend image). Example: "nvcr.io/nvidia/dynamo-runtime:latest" TODO: In a future MR, the operator will derive the backend inference image from the backend type automatically; backend images can be overridden via overrides.dgd.',
)
modelCache: Optional[ModelCacheSpec] = Field(
default=None,
description="ModelCache provides optional PVC configuration for pre-downloaded model weights. When provided, weights are loaded from the PVC instead of downloading from HuggingFace.",
)
hardware: Optional[HardwareSpec] = Field(
default=None,
description="Hardware describes the hardware resources available for profiling and deployment. Typically auto-filled by the operator from cluster discovery.",
)
workload: Optional[WorkloadSpec] = Field(
default=None,
description="Workload defines the expected workload characteristics for SLA-based profiling.",
)
sla: Optional[SLASpec] = Field(
default=None,
description="SLA defines service-level agreement targets that drive profiling optimization.",
)
overrides: Optional[OverridesSpec] = Field(
default=None,
description="Overrides allows customizing the profiling job and the generated DynamoGraphDeployment.",
)
features: Optional[FeaturesSpec] = Field(
default=None,
description="Features controls optional Dynamo platform features in the generated deployment.",
)
searchStrategy: SearchStrategy = Field(
default="rapid",
description='SearchStrategy controls the profiling search depth. "rapid" performs a fast sweep; "thorough" explores more configurations.',
)
autoApply: bool = Field(
default=True,
description="AutoApply indicates whether to automatically create a DynamoGraphDeployment after profiling completes. If false, the generated spec is stored in status for manual review and application.",
)
class ParetoConfig(BaseModel):
"""ParetoConfig represents a single Pareto-optimal deployment configuration discovered during profiling."""
config: Dict[str, Any] = Field(
description="Config is the full deployment configuration for this Pareto point."
)
class ProfilingResultsStatus(BaseModel):
"""ProfilingResultsStatus contains the output of the profiling process."""
pareto: Optional[List[ParetoConfig]] = Field(
default=None,
description="Pareto is the list of Pareto-optimal deployment configurations discovered during profiling. Each entry represents a different cost/performance trade-off.",
)
selectedConfig: Optional[Dict[str, Any]] = Field(
default=None,
description="SelectedConfig is the recommended configuration chosen by the profiler based on the SLA targets. This is the configuration used for deployment when autoApply is true.",
)
class DeploymentInfoStatus(BaseModel):
"""DeploymentInfoStatus tracks the state of the deployed DynamoGraphDeployment."""
replicas: Optional[int] = Field(
default=None, description="Replicas is the desired number of replicas."
)
availableReplicas: Optional[int] = Field(
default=None,
description="AvailableReplicas is the number of replicas that are available and ready.",
)
class DynamoGraphDeploymentRequestStatus(BaseModel):
"""DynamoGraphDeploymentRequestStatus represents the observed state of a DynamoGraphDeploymentRequest."""
phase: Optional[DGDRPhase] = Field(
default=None,
description="Phase is the high-level lifecycle phase of the deployment request.",
)
profilingPhase: Optional[ProfilingPhase] = Field(
default=None,
description='ProfilingPhase indicates the current sub-phase of the profiling pipeline. Only meaningful when Phase is "Profiling". Cleared when profiling completes or fails.',
)
dgdName: Optional[str] = Field(
default=None,
description="DGDName is the name of the generated or created DynamoGraphDeployment.",
)
profilingJobName: Optional[str] = Field(
default=None,
description="ProfilingJobName is the name of the Kubernetes Job running the profiler.",
)
profilingResults: Optional[ProfilingResultsStatus] = Field(
default=None,
description="ProfilingResults contains the output of the profiling process including Pareto-optimal configurations and the selected deployment configuration.",
)
deploymentInfo: Optional[DeploymentInfoStatus] = Field(
default=None,
description="DeploymentInfo tracks the state of the deployed DynamoGraphDeployment. Populated when a DGD has been created (either via autoApply or manually).",
)
observedGeneration: Optional[int] = Field(
default=None,
description="ObservedGeneration is the most recent generation observed by the controller.",
)
class DynamoGraphDeploymentRequest(BaseModel):
"""DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API. It provides a simplified, SLA-driven interface for deploying inference models on Dynamo. Users specify a model and optional performance targets; the controller handles profiling, configuration selection, and deployment."""
spec: Optional[DynamoGraphDeploymentRequestSpec] = Field(
default=None,
description="Spec defines the desired state for this deployment request.",
)
status: Optional[DynamoGraphDeploymentRequestStatus] = Field(
default=None,
description="Status reflects the current observed state of this deployment request.",
)
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -106,6 +106,10 @@ rules:
resourceNames: ["{{ include "dynamo-operator.fullname" . }}-mutating"]
{{- end }}
verbs: ["get", "patch"]
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
resourceNames: ["dynamographdeploymentrequests.nvidia.com"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
......@@ -253,6 +257,29 @@ spec:
}
]"
echo "📝 Patching DGDR CRD spec.conversion..."
# Set the full conversion block (strategy + service + caBundle) in one merge patch.
# This is idempotent: safe to run on install and upgrade.
kubectl patch crd dynamographdeploymentrequests.nvidia.com \
--type merge --patch "{
\"spec\": {
\"conversion\": {
\"strategy\": \"Webhook\",
\"webhook\": {
\"clientConfig\": {
\"service\": {
\"name\": \"{{ include "dynamo-operator.fullname" . }}-webhook-service\",
\"namespace\": \"{{ .Release.Namespace }}\",
\"path\": \"/convert\"
},
\"caBundle\": \"${CA_BUNDLE}\"
},
\"conversionReviewVersions\": [\"v1\"]
}
}
}
}"
echo "✅ CA bundle injected successfully!"
echo "🎉 Webhook configuration complete!"
securityContext:
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if and .Values.webhook.enabled .Values.webhook.certManager.enabled }}
---
# ServiceAccount for the CRD conversion patch job
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
---
# ClusterRole to patch the DGDR CRD
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
resourceNames: ["dynamographdeploymentrequests.nvidia.com"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
namespace: {{ .Release.Namespace }}
---
# Job to patch the DGDR CRD with spec.conversion and cert-manager CA injection annotation.
# The cert-manager ca-injector then watches the CRD annotation and keeps the caBundle
# in spec.conversion.webhook.clientConfig.caBundle up to date automatically.
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch-{{ .Release.Revision }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "2"
"helm.sh/hook-delete-policy": before-hook-creation
spec:
backoffLimit: 5
template:
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 8 }}
spec:
serviceAccountName: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
restartPolicy: OnFailure
{{- with .Values.controllerManager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: crd-conversion-patch
image: {{ .Values.webhook.certGenerator.image.repository }}:{{ .Values.webhook.certGenerator.image.tag }}
imagePullPolicy: {{ .Values.webhook.certGenerator.image.pullPolicy }}
command:
- /bin/bash
- -c
- |
set -e
echo "📝 Patching DGDR CRD with spec.conversion and cert-manager CA injection annotation..."
# Set spec.conversion (caBundle left empty — cert-manager ca-injector fills it in
# automatically once it sees the cert-manager.io/inject-ca-from annotation below).
kubectl patch crd dynamographdeploymentrequests.nvidia.com \
--type merge --patch '{
"spec": {
"conversion": {
"strategy": "Webhook",
"webhook": {
"clientConfig": {
"service": {
"name": "{{ include "dynamo-operator.fullname" . }}-webhook-service",
"namespace": "{{ .Release.Namespace }}",
"path": "/convert"
}
},
"conversionReviewVersions": ["v1"]
}
}
}
}'
# Add the cert-manager annotation so the ca-injector keeps the caBundle current.
kubectl annotate crd dynamographdeploymentrequests.nvidia.com \
--overwrite \
cert-manager.io/inject-ca-from="{{ .Release.Namespace }}/{{ include "dynamo-operator.fullname" . }}-serving-cert"
echo "✅ DGDR CRD conversion webhook configured."
echo " cert-manager ca-injector will populate the caBundle automatically."
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
{{- end }}
......@@ -13,8 +13,9 @@ ARG TARGETARCH
RUN echo "Building for ${TARGETOS}/${TARGETARCH}"
# Install common dependencies
RUN apt-get update && apt-get install -y --no-install-recommends git && apt-get clean && rm -rf /var/lib/apt/lists/*
# Install common dependencies (python3-pip needed for generate-pydantic target)
RUN apt-get update && apt-get install -y --no-install-recommends git python3-pip && apt-get clean && rm -rf /var/lib/apt/lists/* \
&& pip3 install --no-cache-dir pydantic --break-system-packages
WORKDIR /workspace
......
......@@ -141,9 +141,17 @@ manifests: controller-gen ensure-yq ## Generate WebhookConfiguration, ClusterRol
done
.PHONY: generate
generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
generate: controller-gen generate-pydantic ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt_" paths="./..."
.PHONY: generate-pydantic
generate-pydantic: ## Generate Python Pydantic models from v1beta1 Go types (requires Python 3 + pydantic)
@python3 -c "import pydantic" 2>/dev/null || { echo "Error: pydantic not found. Install with: pip install pydantic"; exit 1; }
@echo "Generating Pydantic models from v1beta1 DGDR types..."
@python3 api/scripts/generate_pydantic_from_go.py
@echo "Running Pydantic validation tests..."
@python3 api/scripts/test_pydantic_models.py
.PHONY: fmt
fmt: ## Run go fmt against code.
go fmt ./...
......@@ -330,6 +338,9 @@ generate-api-docs: crd-ref-docs ## Generate API reference documentation from CRD
cat docs/header.md ./docs/api_reference.md docs/footer.md > ../../docs/pages/kubernetes/api-reference.md
rm ./docs/api_reference.md
@echo "✅ Concatenated header.md, api_reference.md, and footer.md"
# Fix duplicate anchors: crd-ref-docs generates identical anchors for same-named types
# across API versions; prepend "v1beta1 " to affected v1beta1 headings and links.
python3 docs/fix-api-anchors.py ../../docs/pages/kubernetes/api-reference.md
.PHONY: coverage
coverage: test
......
......@@ -6,7 +6,7 @@ domain: nvidia.com
layout:
- go.kubebuilder.io/v4
projectName: dynamo-kubernetes-operator
repo: github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1
repo: github.com/ai-dynamo/dynamo/deploy/operator
resources:
- api:
crdVersion: v1
......
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script to convert Go struct definitions from v1beta1 DGDR types to Python Pydantic models.
This script parses the Go file containing Kubernetes CRD type definitions and generates
corresponding Pydantic models that can be used in Python code (e.g., in the profiler).
Usage:
python generate_pydantic_from_go.py [--input INPUT_FILE] [--output OUTPUT_FILE]
"""
import argparse
import re
import subprocess
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, Tuple
# Per-struct docstring overrides for cases where the Python docstring should differ
# from the Go comment (e.g. Python-specific mutual-exclusivity documentation).
_STRUCT_DOCSTRINGS: dict = {
"SLASpec": (
"Service-level agreement targets.\n\n"
" Provide exactly one of:\n\n"
" - ``ttft`` + ``itl``: explicit latency targets (default: 2000 ms / 30 ms)\n"
" - ``e2eLatency``: end-to-end latency target\n"
" - ``optimizationType``: high-level objective without explicit numeric targets"
),
}
# Extra Python code (validators, etc.) appended after the generated field list for
# specific structs. Cannot be expressed as Go kubebuilder markers.
_STRUCT_EXTRAS: dict = {
"SLASpec": """\
@model_validator(mode="after")
def _validate_sla_options(self) -> "SLASpec":
\"\"\"Ensure at most one SLA mode is active.\"\"\"
has_ttft_itl = self.ttft is not None and self.itl is not None
has_e2e = self.e2eLatency is not None
has_opt = self.optimizationType is not None
options_count = sum([has_ttft_itl, has_e2e, has_opt])
if options_count > 1:
raise ValueError(
"SLA must specify exactly one of: (ttft and itl), e2eLatency, "
"or optimizationType \u2014 not multiple."
)
if (self.ttft is not None) != (self.itl is not None):
raise ValueError("ttft and itl must both be provided together.")
return self
""",
}
_SPDX_HEADER = """\
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.\
"""
def _resolve_repo_root(start: Path) -> Path:
"""Return the repository root via git, falling back to go.mod traversal."""
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
text=True,
check=True,
cwd=start,
)
return Path(result.stdout.strip())
except (subprocess.CalledProcessError, FileNotFoundError):
pass
# Fallback: walk up until we find go.mod
p = start
while p != p.parent:
if (p / "go.mod").exists():
return p
p = p.parent
return start
@dataclass
class GoField:
"""Represents a field in a Go struct"""
name: str
go_type: str
json_tag: str
comment: str
is_optional: bool
is_pointer: bool
default: Optional[str] = field(default=None)
@dataclass
class GoStruct:
"""Represents a Go struct definition"""
name: str
fields: List[GoField]
comment: str
@dataclass
class GoEnum:
"""Represents a Go enum (const block)"""
name: str
base_type: str
values: List[Tuple[str, str]] # (const_name, const_value)
comment: str
class GoToPydanticConverter:
"""Converts Go structs to Pydantic models"""
# Type mapping from Go to Python
# RUF012: use a plain dict (not ClassVar) — instantiated once per instance
TYPE_MAP: dict = {
"string": "str",
"int": "int",
"int32": "int",
"int64": "int",
"float64": "float",
"bool": "bool",
"metav1.ObjectMeta": "Dict[str, Any]", # Simplified
"metav1.TypeMeta": "Dict[str, Any]", # Simplified
"metav1.Condition": "Dict[str, Any]", # Simplified
"runtime.RawExtension": "Dict[str, Any]",
"batchv1.JobSpec": "Dict[str, Any]",
"corev1.ResourceRequirements": "Dict[str, Any]",
}
def __init__(self):
self.structs: List[GoStruct] = []
self.enums: List[GoEnum] = []
def parse_go_file(self, file_path: Path) -> None:
"""Parse Go file and extract struct and enum definitions"""
content = file_path.read_text()
# Extract enum definitions (const blocks with string type)
self._parse_enums(content)
# Extract struct definitions
self._parse_structs(content)
def _parse_enums(self, content: str) -> None:
"""Parse Go const blocks that define enum types"""
# Pattern: // Comment\n// +kubebuilder:validation:Enum=val1;val2\ntype Name string
enum_pattern = r"(?://.*\n)*// \+kubebuilder:validation:Enum=([^\n]+)\ntype\s+(\w+)\s+string"
for match in re.finditer(enum_pattern, content):
enum_values_str = match.group(1)
enum_name = match.group(2)
# Extract comment — stop at blank // lines (avoid lifecycle steps etc.)
lines_before = content[: match.start()].split("\n")
comment_lines = []
for line in reversed(lines_before[-10:]): # Look at last 10 lines
stripped = line.strip()
if stripped == "//":
break # blank comment line — stop collecting
if stripped.startswith("//") and "kubebuilder" not in stripped:
comment_lines.insert(0, stripped.lstrip("/ ").strip())
elif stripped and not stripped.startswith("//"):
break
# Parse enum values from kubebuilder annotation (fallback)
enum_values_fallback = [v.strip() for v in enum_values_str.split(";")]
# Try to find individual const definitions for this enum type.
# We search line-by-line rather than requiring a contiguous block
# (blocks may contain comments between entries or mix multiple types).
individual_pattern = (
rf"(?m)^\s*{enum_name}(\w+)\s+{enum_name}\s+=\s+\"([^\"]+)\""
)
values: List[Tuple[str, str]] = []
for m in re.finditer(individual_pattern, content):
const_name = m.group(1)
const_value = m.group(2)
# Sanitise Python reserved words
if const_name in (
"None",
"True",
"False",
"pass",
"return",
"class",
"def",
):
const_name = f"{const_name}_"
values.append((const_name, const_value))
if not values:
# Fallback: derive names from kubebuilder annotation values
values = [
(v.upper().replace("-", "_"), v) for v in enum_values_fallback
]
self.enums.append(
GoEnum(
name=enum_name,
base_type="string",
values=values,
comment=" ".join(comment_lines),
)
)
def _parse_structs(self, content: str) -> None:
"""Parse Go struct definitions"""
struct_pattern = r"type\s+(\w+)\s+struct\s+\{"
for match in re.finditer(struct_pattern, content):
struct_name = match.group(1)
start_pos = match.end()
# Find matching closing brace by counting braces
brace_count = 1
pos = start_pos
while brace_count > 0 and pos < len(content):
if content[pos] == "{":
brace_count += 1
elif content[pos] == "}":
brace_count -= 1
pos += 1
struct_body = content[start_pos : pos - 1]
# Skip List types
if struct_name.endswith("List"):
continue
# Extract the struct docstring using a forward scan.
#
# A backward scan fails when kubebuilder markers appear between the
# description and the type declaration (e.g. DynamoGraphDeploymentRequest),
# because the blank // separator between the description and later content
# (lifecycle steps, markers) causes an early stop before any text is collected.
#
# Algorithm:
# 1. Walk backward to find the start of the contiguous comment block.
# 2. Walk forward from that start, skipping kubebuilder lines, and collect
# lines until the first blank // (end of the first paragraph).
lines_before = content[: match.start()].split("\n")
# Step 1: find the index just after the last non-comment line before the block
comment_start_idx = 0
for i, line in enumerate(reversed(lines_before)):
stripped = line.strip()
if stripped and not stripped.startswith("//"):
comment_start_idx = len(lines_before) - i
break
# Step 2: forward scan — collect the first paragraph, skip markers
comment_lines: List[str] = []
for line in lines_before[comment_start_idx:]:
stripped = line.strip()
if stripped == "//":
if comment_lines: # blank line ends the first paragraph
break
elif (
stripped.startswith("//")
and "kubebuilder" not in stripped
and "EDIT THIS FILE" not in stripped
):
comment_lines.append(stripped.lstrip("/ ").strip())
# kubebuilder markers and empty lines: skip, keep scanning
# Parse fields
fields = self._parse_struct_fields(struct_body)
self.structs.append(
GoStruct(
name=struct_name, fields=fields, comment=" ".join(comment_lines)
)
)
def _parse_struct_fields(self, struct_body: str) -> List[GoField]:
"""Parse fields from struct body"""
fields = []
lines = struct_body.strip().split("\n")
i = 0
while i < len(lines):
line = lines[i].strip()
# Skip empty lines
if not line:
i += 1
continue
# Collect consecutive comment lines (including single-line comments)
comment_lines: List[str] = []
default_value: Optional[str] = None
while line.startswith("//"):
# Capture kubebuilder:default (affects both Go API and Python default)
kb_match = re.search(r"\+kubebuilder:default=(\S+)", line)
# Capture +python-default (Python-only; does not affect the Go/k8s API)
py_match = re.search(r"\+python-default=(\S+)", line)
if kb_match:
default_value = kb_match.group(1)
elif py_match:
default_value = py_match.group(1)
elif (
"kubebuilder" not in line
and "+optional" not in line.lower()
and "+python-default" not in line
):
comment_lines.append(line.lstrip("/ ").strip())
i += 1
if i >= len(lines):
break
line = lines[i].strip()
# Now line should be a field definition or empty
if not line:
i += 1
continue
# Pattern: FieldName type `json:"jsonName,omitempty"`
field_pattern = r'(\w+)\s+([\w\.\*\[\]]+)\s+`json:"([^"]+)"`'
match = re.match(field_pattern, line)
if match:
field_type = match.group(2)
json_tag_full = match.group(3)
json_parts = json_tag_full.split(",")
json_name = json_parts[0]
is_optional = "omitempty" in json_parts or ",inline" in json_tag_full
is_pointer = field_type.startswith("*")
if is_pointer:
field_type = field_type[1:]
# Skip inline fields (metav1.TypeMeta, metav1.ObjectMeta)
if ",inline" in json_tag_full:
i += 1
continue
fields.append(
GoField(
name=json_name,
go_type=field_type,
json_tag=json_name,
comment=" ".join(comment_lines),
is_optional=is_optional,
is_pointer=is_pointer,
default=default_value,
)
)
i += 1
return fields
def _go_type_to_python(
self, go_type: str, is_pointer: bool, is_optional: bool
) -> str:
"""Convert Go type to Python type hint"""
# Handle array types
if go_type.startswith("[]"):
inner_type = go_type[2:]
python_inner = self._go_type_to_python(inner_type, False, False)
result = f"List[{python_inner}]"
if is_optional:
return f"Optional[{result}]"
return result
# Handle map types
if go_type.startswith("map["):
map_match = re.match(r"map\[(\w+)\](.+)", go_type)
if map_match:
key_type = self.TYPE_MAP.get(map_match.group(1), "str")
val_type = self._go_type_to_python(map_match.group(2), False, False)
result = f"Dict[{key_type}, {val_type}]"
if is_optional:
return f"Optional[{result}]"
return result
# Check if it's a known enum
for enum in self.enums:
if go_type == enum.name:
if is_pointer or is_optional:
return f"Optional[{enum.name}]"
return enum.name
# Check if it's a struct we're defining
struct_names = [s.name for s in self.structs]
if go_type in struct_names:
if is_pointer or is_optional:
return f"Optional[{go_type}]"
return go_type
# Use type map
python_type = self.TYPE_MAP.get(go_type, go_type)
if is_pointer or is_optional:
return f"Optional[{python_type}]"
return python_type
def generate_pydantic(self) -> str:
"""Generate Pydantic models from parsed structs"""
lines = [
_SPDX_HEADER,
'"""',
"Auto-generated Pydantic models from v1beta1 DGDR Go types.",
"",
"Generated by: deploy/operator/api/scripts/generate_pydantic_from_go.py",
"Source: deploy/operator/api/v1beta1/dynamographdeploymentrequest_types.go",
"",
"DO NOT EDIT MANUALLY - regenerate using the script.",
'"""',
"",
"from enum import Enum",
"from typing import Any, Dict, List, Optional",
"",
"from pydantic import BaseModel, Field, model_validator",
"",
]
# Generate enums first
for enum in self.enums:
lines.append("")
if enum.comment:
lines.append(f"# {enum.comment}")
lines.append(f"class {enum.name}(str, Enum):")
if not enum.values:
lines.append(" pass")
else:
for const_name, const_value in enum.values:
lines.append(f' {const_name} = "{const_value}"')
# Generate struct models
for struct in self.structs:
lines.append("")
lines.append("")
lines.append(f"class {struct.name}(BaseModel):")
# Add docstring — prefer an explicit override, fall back to Go comment
docstring = _STRUCT_DOCSTRINGS.get(struct.name) or struct.comment
if docstring:
if "\n" in docstring:
lines.append(f' """{docstring}"""')
else:
lines.append(f' """{docstring}"""')
lines.append("")
if not struct.fields:
lines.append(" pass")
continue
# Generate fields
for go_field in struct.fields:
# Optional-wrapping rules:
# - Non-pointer + default: NOT Optional. The API server (kubebuilder:default)
# or the Python default ensures the field always has a value.
# - Pointer + default: keep Optional. The pointer can still be nil in the
# API object (e.g. +python-default is Python-layer only); callers may
# explicitly pass None.
# - No default: follow is_optional (omitempty → Optional).
effective_optional = go_field.is_optional and (
go_field.is_pointer or go_field.default is None
)
python_type = self._go_type_to_python(
go_field.go_type, go_field.is_pointer, effective_optional
)
field_def = f" {go_field.name}: {python_type}"
if (
go_field.comment
or effective_optional
or go_field.default is not None
):
field_args = []
if go_field.default is not None:
# Emit the default from kubebuilder annotation
raw = go_field.default
# Strip surrounding Go-style double quotes if present
if raw.startswith('"') and raw.endswith('"'):
raw = raw[1:-1]
# Try to cast to int/float/bool; otherwise keep as string
if raw.lower() == "true":
field_args.append("default=True")
elif raw.lower() == "false":
field_args.append("default=False")
else:
try:
int(raw)
field_args.append(f"default={raw}")
except ValueError:
try:
float(raw)
field_args.append(f"default={raw}")
except ValueError:
field_args.append(f'default="{raw}"')
elif effective_optional:
field_args.append("default=None")
if go_field.comment:
comment_escaped = go_field.comment.replace('"', '\\"')
field_args.append(f'description="{comment_escaped}"')
field_def += f' = Field({", ".join(field_args)})'
lines.append(field_def)
# Append any struct-specific extras (validators, etc.)
extra = _STRUCT_EXTRAS.get(struct.name)
if extra:
lines.append("")
for extra_line in extra.splitlines():
lines.append(extra_line)
return "\n".join(lines)
def main():
script_dir = Path(__file__).parent.resolve()
repo_root = _resolve_repo_root(script_dir)
parser = argparse.ArgumentParser(
description="Convert Go DGDR types to Python Pydantic models"
)
parser.add_argument(
"--input",
type=Path,
default=script_dir.parent / "v1beta1" / "dynamographdeploymentrequest_types.go",
help="Input Go file path",
)
parser.add_argument(
"--output",
type=Path,
default=repo_root
/ "components"
/ "src"
/ "dynamo"
/ "profiler"
/ "utils"
/ "dgdr_v1beta1_types.py",
help="Output Python file path",
)
args = parser.parse_args()
if not args.input.exists():
print(f"Error: Input file not found: {args.input}")
return 1
args.output.parent.mkdir(parents=True, exist_ok=True)
print(f"Parsing Go types from: {args.input}")
converter = GoToPydanticConverter()
converter.parse_go_file(args.input)
print(f"Found {len(converter.enums)} enums and {len(converter.structs)} structs")
pydantic_code = converter.generate_pydantic()
args.output.write_text(pydantic_code + "\n")
print(f"Generated Pydantic models at: {args.output}")
# Format with black to match the project linter style
try:
subprocess.run(
["black", "--line-length=88", "--quiet", str(args.output)],
check=True,
)
print(f"Formatted with black: {args.output}")
except FileNotFoundError:
print("Warning: black not found; output may not match linter formatting")
except subprocess.CalledProcessError as e:
print(f"Warning: black formatting failed: {e}")
return 0
if __name__ == "__main__":
exit(main())
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test script for v1beta1 Pydantic models.
Validates that the generated Pydantic models can be imported and used correctly.
"""
import subprocess
import sys
from pathlib import Path
def _repo_root() -> Path:
start = Path(__file__).parent
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
text=True,
check=True,
cwd=start,
)
return Path(result.stdout.strip())
except (subprocess.CalledProcessError, FileNotFoundError):
pass
# Fallback: walk up until we find go.mod (same logic as generate_pydantic_from_go.py)
p = start
while p != p.parent:
if (p / "go.mod").exists():
return p
p = p.parent
return start
# Add the components src to path so we can import the generated models
sys.path.insert(0, str(_repo_root() / "components" / "src"))
import pydantic # noqa: E402
from dynamo.profiler.utils.dgdr_v1beta1_types import ( # noqa: E402
BackendType,
DeploymentInfoStatus,
DGDRPhase,
DynamoGraphDeploymentRequestSpec,
DynamoGraphDeploymentRequestStatus,
FeaturesSpec,
MockerSpec,
ModelCacheSpec,
OptimizationType,
PlannerPreDeploymentSweepMode,
PlannerSpec,
ProfilingPhase,
SearchStrategy,
SLASpec,
WorkloadSpec,
)
print("✓ Successfully imported all Pydantic models")
def test_simple_dgdr():
"""Test creating a simple DGDR (minimal spec)"""
spec = DynamoGraphDeploymentRequestSpec(
model="Qwen/Qwen3-32B",
)
print("✓ Created simple DGDR spec")
assert spec.model == "Qwen/Qwen3-32B"
assert spec.backend == BackendType.Auto # kubebuilder:default=auto
assert spec.autoApply is True # kubebuilder:default=true
print("✓ Simple DGDR spec validation passed")
def test_full_dgdr():
"""Test creating a full DGDR with all fields"""
spec = DynamoGraphDeploymentRequestSpec(
model="meta-llama/Llama-3.1-405B",
backend=BackendType.Vllm,
image="nvcr.io/nvidia/dynamo-runtime:latest",
workload=WorkloadSpec(
isl=1024,
osl=512,
concurrency=10.0,
),
sla=SLASpec(
ttft=100.0,
itl=10.0,
),
modelCache=ModelCacheSpec(
pvcName="model-cache",
pvcModelPath="llama-3.1-405b",
),
features=FeaturesSpec(
planner=PlannerSpec(enabled=True),
mocker=MockerSpec(enabled=False),
),
searchStrategy=SearchStrategy.Rapid,
autoApply=True,
)
print("✓ Created full DGDR spec")
assert spec.model == "meta-llama/Llama-3.1-405B"
assert spec.backend == BackendType.Vllm
assert spec.workload.isl == 1024
assert spec.sla.ttft == 100.0
assert spec.sla.itl == 10.0
assert spec.modelCache.pvcName == "model-cache"
assert spec.modelCache.pvcModelPath == "llama-3.1-405b"
assert spec.features.planner.enabled is True
assert spec.features.mocker.enabled is False
print("✓ Full DGDR spec validation passed")
def test_sla_defaults_and_validation():
"""Test SLASpec defaults and mutual-exclusivity validator"""
# Default mode: ttft + itl with python-defaults
sla = SLASpec()
assert sla.ttft == 2000.0
assert sla.itl == 30.0
assert sla.e2eLatency is None
assert sla.optimizationType is None
print("✓ SLASpec defaults correct")
# explicit ttft+itl mode: OK
SLASpec(ttft=100.0, itl=10.0)
# e2eLatency mode: OK (null out ttft/itl)
SLASpec(ttft=None, itl=None, e2eLatency=500.0)
# optimizationType mode: OK (null out ttft/itl)
SLASpec(ttft=None, itl=None, optimizationType=OptimizationType.Throughput)
# mixing modes should raise
try:
SLASpec(ttft=100.0, itl=10.0, e2eLatency=500.0)
raise AssertionError("expected ValidationError for mixed SLA modes")
except pydantic.ValidationError:
pass
# ttft without itl should raise
try:
SLASpec(itl=None, ttft=100.0)
raise AssertionError("expected ValidationError for ttft without itl")
except pydantic.ValidationError:
pass
print("✓ SLASpec validation correct")
def test_workload_defaults():
"""Test WorkloadSpec kubebuilder defaults"""
w = WorkloadSpec()
assert w.isl == 4000
assert w.osl == 1000
print("✓ WorkloadSpec defaults correct")
def test_enums():
"""Test enum values"""
# DGDRPhase — TitleCase suffix from Go const names
assert DGDRPhase.Pending == "Pending"
assert DGDRPhase.Profiling == "Profiling"
assert DGDRPhase.Deployed == "Deployed"
# ProfilingPhase — TitleCase suffix from Go const names
assert ProfilingPhase.Initializing == "Initializing"
assert ProfilingPhase.SweepingPrefill == "SweepingPrefill"
# OptimizationType — TitleCase from Go const names
assert OptimizationType.Latency == "latency"
assert OptimizationType.Throughput == "throughput"
# SearchStrategy — TitleCase from Go const names
assert SearchStrategy.Rapid == "rapid"
assert SearchStrategy.Thorough == "thorough"
# BackendType — mixed case from Go const names
assert BackendType.Auto == "auto"
assert BackendType.Vllm == "vllm"
# PlannerPreDeploymentSweepMode (None → None_ to avoid Python keyword clash)
assert PlannerPreDeploymentSweepMode.None_ == "none"
assert PlannerPreDeploymentSweepMode.Rapid == "rapid"
print("✓ All enum values validated")
def test_status_models():
"""Test status model creation"""
status = DynamoGraphDeploymentRequestStatus(
phase=DGDRPhase.Profiling,
profilingPhase=ProfilingPhase.SweepingPrefill,
dgdName="test-dgd",
profilingJobName="test-profiling-job",
deploymentInfo=DeploymentInfoStatus(
replicas=3,
availableReplicas=2,
),
)
print("✓ Created DGDR status")
assert status.phase == DGDRPhase.Profiling
assert status.profilingPhase == ProfilingPhase.SweepingPrefill
assert status.deploymentInfo.replicas == 3
print("✓ DGDR status validation passed")
def main():
"""Run all tests"""
print("\n" + "=" * 60)
print("Testing v1beta1 Pydantic Models")
print("=" * 60 + "\n")
test_simple_dgdr()
test_full_dgdr()
test_sla_defaults_and_validation()
test_workload_defaults()
test_enums()
test_status_models()
print("\n" + "=" * 60)
print("All tests passed! ✓")
print("=" * 60 + "\n")
return 0
if __name__ == "__main__":
sys.exit(main())
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Conversion between v1alpha1 and v1beta1 DynamoGraphDeploymentRequest (DGDR).
//
// The two API versions have fundamentally different shapes: v1alpha1 stores SLA,
// workload, and model-cache configuration inside an opaque JSON blob
// (ProfilingConfig.Config), while v1beta1 breaks these out into typed structs.
// This file bridges the two representations.
//
// # Spec field mapping
//
// 1:1 simple mappings (value copied, path or wrapper type differs):
//
// v1alpha1 v1beta1
// ────────────────────────────────────────── ──────────────────────────────────────
// Spec.Model (string) Spec.Model (string)
// Spec.Backend (string) Spec.Backend (BackendType)
// Spec.AutoApply (bool) Spec.AutoApply (bool)
// Spec.UseMocker (bool) Spec.Features.Mocker.Enabled (bool)
// Spec.ProfilingConfig.ProfilerImage Spec.Image (string)
// Spec.DeploymentOverrides.WorkersImage (no v1beta1 equivalent yet — TODO: overrides.dgd)
//
// JSON blob → structured fields (parsed/reconstructed on each trip):
//
// Blob key path v1beta1 field
// ────────────────────────────────────────── ──────────────────────────────────────
// sla.ttft Spec.SLA.TTFT (*float64)
// sla.itl Spec.SLA.ITL (*float64)
// sla.isl Spec.Workload.ISL (*int32)
// sla.osl Spec.Workload.OSL (*int32)
// deployment.modelCache.pvcName Spec.ModelCache.PVCName (string)
// deployment.modelCache.modelPathInPvc Spec.ModelCache.PVCModelPath (string)
// deployment.modelCache.pvcMountPath Spec.ModelCache.PVCMountPath (string)
//
// The full JSON blob is also preserved as the annotation
// nvidia.com/dgdr-profiling-config so that unknown keys survive the round-trip.
// On ConvertFrom the blob is loaded from the annotation first, then the
// structured v1beta1 fields are written on top (structured fields win).
//
// Structural reshaping (same data, different container type):
//
// v1alpha1 v1beta1
// ────────────────────────────────────────── ──────────────────────────────────────
// ProfilingConfig.Resources Overrides.ProfilingJob.Template.
// (*corev1.ResourceRequirements) Spec.Containers[0].Resources
// ProfilingConfig.Tolerations Overrides.ProfilingJob.Template.
// ([]corev1.Toleration) Spec.Tolerations
//
// Annotation-only (no v1beta1 equivalent; round-tripped via ObjectMeta annotations):
//
// v1alpha1 field Annotation key
// ────────────────────────────────────────── ──────────────────────────────────────
// Spec.EnableGPUDiscovery nvidia.com/dgdr-enable-gpu-discovery
// Spec.ProfilingConfig.ConfigMapRef nvidia.com/dgdr-config-map-ref
// Spec.ProfilingConfig.OutputPVC nvidia.com/dgdr-output-pvc
// Spec.ProfilingConfig.Config (full blob) nvidia.com/dgdr-profiling-config
// Spec.DeploymentOverrides.{Name, nvidia.com/dgdr-deployment-overrides
// Namespace,Labels,Annotations}
//
// v1beta1-only fields with no v1alpha1 equivalent (omitted / TODO):
//
// Hardware.*, Workload.{Concurrency,RequestRate}, SLA.{E2ELatency,OptimizationType},
// Features.{Planner.*,KVRouter}, SearchStrategy
//
// # Status field mapping
//
// 1:1 simple mappings:
//
// v1alpha1 v1beta1
// ────────────────────────────────────────── ──────────────────────────────────────
// Status.ObservedGeneration Status.ObservedGeneration
// Status.Conditions Status.Conditions
// Status.GeneratedDeployment Status.ProfilingResults.SelectedConfig
// Status.Deployment.Name Status.DGDName
//
// State ↔ Phase (many-to-one, context-dependent):
//
// v1alpha1 State → v1beta1 Phase Notes
// ─────────────────────── ───────────────── ─────────────────────────────────
// "" / "Pending" Pending
// "Profiling" Profiling
// "Ready" Ready or Deployed Deployed if Deployment.Created
// "Deploying" Deploying
// "DeploymentDeleted" Ready lossy
// "Failed" Failed
//
// v1beta1 Phase → v1alpha1 State
// ─────────────────────── ─────────────────
// Pending "Pending"
// Profiling "Profiling"
// Ready "Ready"
// Deploying "Deploying"
// Deployed "Ready" lossy
// Failed "Failed"
//
// Annotation-only status fields (no v1beta1 equivalent):
//
// v1alpha1 field Annotation key
// ────────────────────────────────────────── ──────────────────────────────────────
// Status.Backend nvidia.com/dgdr-status-backend
// Status.ProfilingResults (string ref, nvidia.com/dgdr-profiling-results
// e.g. "configmap/<name>") (not the v1beta1 struct — see below)
// Status.Deployment.{Namespace,State, nvidia.com/dgdr-deployment-status
// Created} (JSON-encoded; Name maps to DGDName)
//
// Note on ProfilingResults naming collision: the two versions both have a field
// called "ProfilingResults" but with entirely different types. v1alpha1 has a
// plain string (a configmap reference). v1beta1 has a struct with Pareto and
// SelectedConfig. The string is annotation-preserved; the struct's
// SelectedConfig maps from v1alpha1 GeneratedDeployment (see 1:1 table above).
//
// Note: v1alpha1 DeploymentStatus{Name,Namespace,State,Created} and v1beta1
// DeploymentInfoStatus{Replicas,AvailableReplicas} share no common fields —
// they track different aspects of the DGD. Only Deployment.Name ↔ DGDName
// overlaps. The rest of DeploymentStatus is round-tripped via annotation;
// DeploymentInfoStatus has no v1alpha1 source and is left empty.
//
// v1beta1-only status fields with no v1alpha1 equivalent (omitted / TODO):
//
// Status.DeploymentInfo.{Replicas,AvailableReplicas},
// Status.ProfilingPhase, Status.ProfilingJobName,
// Status.ProfilingResults.Pareto
package v1alpha1
import (
"encoding/json"
"fmt"
v1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"sigs.k8s.io/controller-runtime/pkg/conversion"
)
// Annotation keys used to round-trip v1alpha1 fields that have no v1beta1 equivalent.
const (
annDGDRConfigMapRef = "nvidia.com/dgdr-config-map-ref"
annDGDROutputPVC = "nvidia.com/dgdr-output-pvc"
annDGDREnableGPUDisc = "nvidia.com/dgdr-enable-gpu-discovery"
annDGDRDeployOverrides = "nvidia.com/dgdr-deployment-overrides"
annDGDRProfilingConfig = "nvidia.com/dgdr-profiling-config"
annDGDRStatusBackend = "nvidia.com/dgdr-status-backend"
annDGDRProfilingResults = "nvidia.com/dgdr-profiling-results"
annDGDRDeploymentStatus = "nvidia.com/dgdr-deployment-status"
annDGDRProfilingJobName = "nvidia.com/dgdr-profiling-job-name"
)
// ConvertTo converts this DynamoGraphDeploymentRequest (v1alpha1) to the Hub version (v1beta1).
func (src *DynamoGraphDeploymentRequest) ConvertTo(dstRaw conversion.Hub) error {
dst, ok := dstRaw.(*v1beta1.DynamoGraphDeploymentRequest)
if !ok {
return fmt.Errorf("expected *v1beta1.DynamoGraphDeploymentRequest but got %T", dstRaw)
}
dst.ObjectMeta = src.ObjectMeta
if err := convertDGDRSpecTo(&src.Spec, &dst.Spec, dst); err != nil {
return err
}
convertDGDRStatusTo(&src.Status, &dst.Status, dst)
return nil
}
// ConvertFrom converts from the Hub version (v1beta1) to this DynamoGraphDeploymentRequest (v1alpha1).
func (dst *DynamoGraphDeploymentRequest) ConvertFrom(srcRaw conversion.Hub) error {
src, ok := srcRaw.(*v1beta1.DynamoGraphDeploymentRequest)
if !ok {
return fmt.Errorf("expected *v1beta1.DynamoGraphDeploymentRequest but got %T", srcRaw)
}
dst.ObjectMeta = src.ObjectMeta
convertDGDRSpecFrom(&src.Spec, &dst.Spec, src)
convertDGDRStatusFrom(&src.Status, &dst.Status, src)
// ProfilingJobName — no v1alpha1 status field; store as annotation for round-trip
if src.Status.ProfilingJobName != "" {
if dst.Annotations == nil {
dst.Annotations = make(map[string]string)
}
dst.Annotations[annDGDRProfilingJobName] = src.Status.ProfilingJobName
}
return nil
}
// setAnnotation initialises the annotation map if needed and sets a key.
func setAnnotation(obj *v1beta1.DynamoGraphDeploymentRequest, key, value string) {
if obj.Annotations == nil {
obj.Annotations = make(map[string]string)
}
obj.Annotations[key] = value
}
// convertDGDRSpecTo converts the v1alpha1 Spec into the v1beta1 Spec.
func convertDGDRSpecTo(src *DynamoGraphDeploymentRequestSpec, dst *v1beta1.DynamoGraphDeploymentRequestSpec, dstObj *v1beta1.DynamoGraphDeploymentRequest) error {
dst.Model = src.Model
dst.AutoApply = src.AutoApply
if src.Backend != "" {
dst.Backend = v1beta1.BackendType(src.Backend)
}
if src.DeploymentOverrides != nil && src.DeploymentOverrides.WorkersImage != "" {
dst.Image = src.DeploymentOverrides.WorkersImage
}
if src.UseMocker {
if dst.Features == nil {
dst.Features = &v1beta1.FeaturesSpec{}
}
dst.Features.Mocker = &v1beta1.MockerSpec{Enabled: true}
}
if src.EnableGPUDiscovery != nil && *src.EnableGPUDiscovery {
setAnnotation(dstObj, annDGDREnableGPUDisc, "true")
}
if src.ProfilingConfig.Config != nil && src.ProfilingConfig.Config.Raw != nil {
var blob map[string]interface{}
if err := json.Unmarshal(src.ProfilingConfig.Config.Raw, &blob); err != nil {
return fmt.Errorf("failed to parse ProfilingConfig.Config: %w", err)
}
applySLAAndWorkloadFromBlob(blob, dst)
applyModelCacheFromBlob(blob, dst)
setAnnotation(dstObj, annDGDRProfilingConfig, string(src.ProfilingConfig.Config.Raw))
}
// ProfilerImage → Image (the profiler runs in the frontend image)
// TODO: In a future MR, backend inference images will be managed separately via overrides.dgd.
if src.ProfilingConfig.ProfilerImage != "" {
dst.Image = src.ProfilingConfig.ProfilerImage
}
if src.ProfilingConfig.ConfigMapRef != nil {
if data, err := json.Marshal(src.ProfilingConfig.ConfigMapRef); err == nil {
setAnnotation(dstObj, annDGDRConfigMapRef, string(data))
}
}
if src.ProfilingConfig.OutputPVC != "" {
setAnnotation(dstObj, annDGDROutputPVC, src.ProfilingConfig.OutputPVC)
}
convertProfilingResourcesToOverrides(&src.ProfilingConfig, dst)
convertDeploymentOverridesToAnnotation(src.DeploymentOverrides, dstObj)
return nil
}
// applySLAAndWorkloadFromBlob extracts SLA and Workload fields from the v1alpha1 JSON blob.
// Both are nested under blob["sla"] in the v1alpha1 schema.
func applySLAAndWorkloadFromBlob(blob map[string]interface{}, dst *v1beta1.DynamoGraphDeploymentRequestSpec) {
slaRaw, ok := blob["sla"]
if !ok {
return
}
slaMap, ok := slaRaw.(map[string]interface{})
if !ok {
return
}
if dst.SLA == nil {
dst.SLA = &v1beta1.SLASpec{}
}
if v, ok := slaMap["ttft"].(float64); ok {
dst.SLA.TTFT = &v
}
if v, ok := slaMap["itl"].(float64); ok {
dst.SLA.ITL = &v
}
if v, ok := slaMap["isl"].(float64); ok {
if dst.Workload == nil {
dst.Workload = &v1beta1.WorkloadSpec{}
}
isl := int32(v)
dst.Workload.ISL = &isl
}
if v, ok := slaMap["osl"].(float64); ok {
if dst.Workload == nil {
dst.Workload = &v1beta1.WorkloadSpec{}
}
osl := int32(v)
dst.Workload.OSL = &osl
}
}
// applyModelCacheFromBlob extracts ModelCache from blob["deployment"]["modelCache"].
func applyModelCacheFromBlob(blob map[string]interface{}, dst *v1beta1.DynamoGraphDeploymentRequestSpec) {
deployRaw, ok := blob["deployment"]
if !ok {
return
}
deployMap, ok := deployRaw.(map[string]interface{})
if !ok {
return
}
mcRaw, ok := deployMap["modelCache"]
if !ok {
return
}
mcMap, ok := mcRaw.(map[string]interface{})
if !ok {
return
}
mc := &v1beta1.ModelCacheSpec{}
if v, ok := mcMap["pvcName"].(string); ok {
mc.PVCName = v
}
if v, ok := mcMap["modelPathInPvc"].(string); ok {
mc.PVCModelPath = v
}
if v, ok := mcMap["pvcMountPath"].(string); ok {
mc.PVCMountPath = v
}
dst.ModelCache = mc
}
// convertProfilingResourcesToOverrides maps ProfilingConfig Resources and Tolerations
// into the v1beta1 Overrides.ProfilingJob pod spec.
func convertProfilingResourcesToOverrides(src *ProfilingConfigSpec, dst *v1beta1.DynamoGraphDeploymentRequestSpec) {
if src.Resources == nil && len(src.Tolerations) == 0 {
return
}
if dst.Overrides == nil {
dst.Overrides = &v1beta1.OverridesSpec{}
}
if dst.Overrides.ProfilingJob == nil {
dst.Overrides.ProfilingJob = &batchv1.JobSpec{
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{},
},
}
}
podSpec := &dst.Overrides.ProfilingJob.Template.Spec
if src.Resources != nil {
if len(podSpec.Containers) == 0 {
podSpec.Containers = []corev1.Container{{}}
}
podSpec.Containers[0].Resources = *src.Resources
}
if len(src.Tolerations) > 0 {
podSpec.Tolerations = src.Tolerations
}
}
// convertDeploymentOverridesToAnnotation serialises the DeploymentOverrides metadata fields
// (Name, Namespace, Labels, Annotations) into an annotation for round-trip.
// WorkersImage is handled separately via dst.Image.
func convertDeploymentOverridesToAnnotation(src *DeploymentOverridesSpec, dstObj *v1beta1.DynamoGraphDeploymentRequest) {
if src == nil {
return
}
overrides := struct {
Name string `json:"name,omitempty"`
Namespace string `json:"namespace,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
}{
Name: src.Name,
Namespace: src.Namespace,
Labels: src.Labels,
Annotations: src.Annotations,
}
if overrides.Name == "" && overrides.Namespace == "" && len(overrides.Labels) == 0 && len(overrides.Annotations) == 0 {
return
}
if data, err := json.Marshal(overrides); err == nil {
setAnnotation(dstObj, annDGDRDeployOverrides, string(data))
}
}
// convertDGDRSpecFrom converts the v1beta1 Spec back into the v1alpha1 Spec.
func convertDGDRSpecFrom(src *v1beta1.DynamoGraphDeploymentRequestSpec, dst *DynamoGraphDeploymentRequestSpec, srcObj *v1beta1.DynamoGraphDeploymentRequest) {
dst.Model = src.Model
dst.AutoApply = src.AutoApply
if src.Backend != "" {
dst.Backend = string(src.Backend)
}
if src.Features != nil && src.Features.Mocker != nil {
dst.UseMocker = src.Features.Mocker.Enabled
}
if srcObj.Annotations != nil {
if v, ok := srcObj.Annotations[annDGDREnableGPUDisc]; ok && v == "true" {
trueVal := true
dst.EnableGPUDiscovery = &trueVal
}
}
// Reconstruct the JSON blob: start from the round-trip annotation (preserves unknown
// keys), then overwrite with structured v1beta1 fields (structured fields win).
var blob map[string]interface{}
if srcObj.Annotations != nil {
if rawBlob, ok := srcObj.Annotations[annDGDRProfilingConfig]; ok && rawBlob != "" {
_ = json.Unmarshal([]byte(rawBlob), &blob) // best-effort
}
}
if src.SLA != nil || src.Workload != nil {
if blob == nil {
blob = make(map[string]interface{})
}
mergeSLAWorkloadIntoBlob(src, blob)
}
if src.ModelCache != nil {
if blob == nil {
blob = make(map[string]interface{})
}
mergeModelCacheIntoBlob(src.ModelCache, blob)
}
if blob != nil {
if data, err := json.Marshal(blob); err == nil {
dst.ProfilingConfig.Config = &apiextensionsv1.JSON{Raw: data}
}
}
// Image → ProfilerImage (round-trip; see ConvertTo for rationale)
// TODO: In a future MR, backend images will come from overrides.dgd; worker image
// (v1alpha1 DeploymentOverrides.WorkersImage) has no v1beta1 equivalent yet.
if src.Image != "" {
dst.ProfilingConfig.ProfilerImage = src.Image
}
restoreAnnotationFields(srcObj, dst)
restoreProfilingJobResources(src, dst)
}
// mergeSLAWorkloadIntoBlob writes SLA and Workload structured fields back into the JSON blob,
// overwriting any existing values for those keys.
func mergeSLAWorkloadIntoBlob(src *v1beta1.DynamoGraphDeploymentRequestSpec, blob map[string]interface{}) {
slaMap, _ := blob["sla"].(map[string]interface{})
if slaMap == nil {
slaMap = make(map[string]interface{})
}
if src.SLA != nil {
if src.SLA.TTFT != nil {
slaMap["ttft"] = *src.SLA.TTFT
}
if src.SLA.ITL != nil {
slaMap["itl"] = *src.SLA.ITL
}
}
if src.Workload != nil {
if src.Workload.ISL != nil {
slaMap["isl"] = float64(*src.Workload.ISL)
}
if src.Workload.OSL != nil {
slaMap["osl"] = float64(*src.Workload.OSL)
}
}
blob["sla"] = slaMap
}
// mergeModelCacheIntoBlob writes ModelCache structured fields back into blob["deployment"]["modelCache"].
func mergeModelCacheIntoBlob(mc *v1beta1.ModelCacheSpec, blob map[string]interface{}) {
deployMap, _ := blob["deployment"].(map[string]interface{})
if deployMap == nil {
deployMap = make(map[string]interface{})
}
mcMap := make(map[string]interface{})
if mc.PVCName != "" {
mcMap["pvcName"] = mc.PVCName
}
if mc.PVCModelPath != "" {
mcMap["modelPathInPvc"] = mc.PVCModelPath
}
if mc.PVCMountPath != "" {
mcMap["pvcMountPath"] = mc.PVCMountPath
}
if len(mcMap) > 0 {
deployMap["modelCache"] = mcMap
blob["deployment"] = deployMap
}
}
// restoreAnnotationFields restores v1alpha1 spec fields that were annotation-preserved
// during ConvertTo: ConfigMapRef, OutputPVC, and DeploymentOverrides.
func restoreAnnotationFields(srcObj *v1beta1.DynamoGraphDeploymentRequest, dst *DynamoGraphDeploymentRequestSpec) {
if srcObj.Annotations == nil {
return
}
if v, ok := srcObj.Annotations[annDGDRConfigMapRef]; ok && v != "" {
var ref ConfigMapKeySelector
if err := json.Unmarshal([]byte(v), &ref); err == nil {
dst.ProfilingConfig.ConfigMapRef = &ref
}
}
if v, ok := srcObj.Annotations[annDGDROutputPVC]; ok {
dst.ProfilingConfig.OutputPVC = v
}
if v, ok := srcObj.Annotations[annDGDRDeployOverrides]; ok && v != "" {
var overrides struct {
Name string `json:"name,omitempty"`
Namespace string `json:"namespace,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
Annotations map[string]string `json:"annotations,omitempty"`
}
if err := json.Unmarshal([]byte(v), &overrides); err == nil {
if dst.DeploymentOverrides == nil {
dst.DeploymentOverrides = &DeploymentOverridesSpec{}
}
dst.DeploymentOverrides.Name = overrides.Name
dst.DeploymentOverrides.Namespace = overrides.Namespace
dst.DeploymentOverrides.Labels = overrides.Labels
dst.DeploymentOverrides.Annotations = overrides.Annotations
}
}
}
// restoreProfilingJobResources restores Resources and Tolerations from
// v1beta1 Overrides.ProfilingJob back into v1alpha1 ProfilingConfig.
func restoreProfilingJobResources(src *v1beta1.DynamoGraphDeploymentRequestSpec, dst *DynamoGraphDeploymentRequestSpec) {
if src.Overrides == nil || src.Overrides.ProfilingJob == nil {
return
}
podSpec := &src.Overrides.ProfilingJob.Template.Spec
if len(podSpec.Containers) > 0 {
res := podSpec.Containers[0].Resources
if len(res.Requests) > 0 || len(res.Limits) > 0 {
dst.ProfilingConfig.Resources = &res
}
}
if len(podSpec.Tolerations) > 0 {
dst.ProfilingConfig.Tolerations = podSpec.Tolerations
}
}
// convertDGDRStatusTo converts the v1alpha1 Status into the v1beta1 Status.
func convertDGDRStatusTo(src *DynamoGraphDeploymentRequestStatus, dst *v1beta1.DynamoGraphDeploymentRequestStatus, dstObj *v1beta1.DynamoGraphDeploymentRequest) {
dst.Phase = dgdrStateToPhase(string(src.State), src.Deployment)
dst.ObservedGeneration = src.ObservedGeneration
dst.Conditions = src.Conditions
if src.Backend != "" {
setAnnotation(dstObj, annDGDRStatusBackend, src.Backend)
}
if src.ProfilingResults != "" {
setAnnotation(dstObj, annDGDRProfilingResults, src.ProfilingResults)
}
if src.GeneratedDeployment != nil {
if dst.ProfilingResults == nil {
dst.ProfilingResults = &v1beta1.ProfilingResultsStatus{}
}
dst.ProfilingResults.SelectedConfig = src.GeneratedDeployment
}
if src.Deployment != nil {
dst.DGDName = src.Deployment.Name
if data, err := json.Marshal(src.Deployment); err == nil {
setAnnotation(dstObj, annDGDRDeploymentStatus, string(data))
}
}
// ProfilingJobName — read from annotation (stored during ConvertFrom for round-trip)
if ann, ok := dstObj.Annotations[annDGDRProfilingJobName]; ok && ann != "" {
dst.ProfilingJobName = ann
}
}
// convertDGDRStatusFrom converts the v1beta1 Status back into the v1alpha1 Status.
func convertDGDRStatusFrom(src *v1beta1.DynamoGraphDeploymentRequestStatus, dst *DynamoGraphDeploymentRequestStatus, srcObj *v1beta1.DynamoGraphDeploymentRequest) {
dst.State = DGDRState(dgdrPhaseToState(src.Phase))
dst.ObservedGeneration = src.ObservedGeneration
dst.Conditions = src.Conditions
if srcObj.Annotations != nil {
if v, ok := srcObj.Annotations[annDGDRStatusBackend]; ok {
dst.Backend = v
}
if v, ok := srcObj.Annotations[annDGDRProfilingResults]; ok {
dst.ProfilingResults = v
}
}
if src.ProfilingResults != nil && src.ProfilingResults.SelectedConfig != nil {
dst.GeneratedDeployment = src.ProfilingResults.SelectedConfig
}
if srcObj.Annotations != nil {
if v, ok := srcObj.Annotations[annDGDRDeploymentStatus]; ok && v != "" {
var depStatus DeploymentStatus
if err := json.Unmarshal([]byte(v), &depStatus); err == nil {
dst.Deployment = &depStatus
}
}
}
// If no annotation but we have DGDName, create a minimal deployment status.
// Created is left false so the v1alpha1 controller does not skip re-creating the DGD.
if dst.Deployment == nil && src.DGDName != "" {
dst.Deployment = &DeploymentStatus{
Name: src.DGDName,
Created: false,
}
}
}
// dgdrStateToPhase maps v1alpha1 state strings to v1beta1 DGDRPhase.
func dgdrStateToPhase(state string, deployment *DeploymentStatus) v1beta1.DGDRPhase {
switch state {
case "", string(DGDRStatePending):
return v1beta1.DGDRPhasePending
case string(DGDRStateProfiling):
return v1beta1.DGDRPhaseProfiling
case string(DGDRStateReady):
// If there is a deployment that was created, it means we are actually Deployed
if deployment != nil && deployment.Created {
return v1beta1.DGDRPhaseDeployed
}
return v1beta1.DGDRPhaseReady
case string(DGDRStateDeploying):
return v1beta1.DGDRPhaseDeploying
case string(DGDRStateDeploymentDeleted):
return v1beta1.DGDRPhaseReady
case string(DGDRStateFailed):
return v1beta1.DGDRPhaseFailed
default:
return v1beta1.DGDRPhasePending
}
}
// dgdrPhaseToState maps v1beta1 DGDRPhase to v1alpha1 state strings.
func dgdrPhaseToState(phase v1beta1.DGDRPhase) string {
switch phase {
case v1beta1.DGDRPhasePending:
return string(DGDRStatePending)
case v1beta1.DGDRPhaseProfiling:
return string(DGDRStateProfiling)
case v1beta1.DGDRPhaseReady:
return string(DGDRStateReady)
case v1beta1.DGDRPhaseDeploying:
return string(DGDRStateDeploying)
case v1beta1.DGDRPhaseDeployed:
return string(DGDRStateReady) // lossy
case v1beta1.DGDRPhaseFailed:
return string(DGDRStateFailed)
default:
return string(DGDRStatePending)
}
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package v1alpha1
import (
"encoding/json"
"testing"
v1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
// newV1alpha1DGDR builds a fully-populated v1alpha1 DGDR for use in tests.
func newV1alpha1DGDR() *DynamoGraphDeploymentRequest {
profilingBlob := map[string]interface{}{
"sla": map[string]interface{}{
"ttft": float64(500),
"itl": float64(20),
"isl": float64(2048),
"osl": float64(512),
},
"deployment": map[string]interface{}{
"modelCache": map[string]interface{}{
"pvcName": "model-pvc",
"modelPathInPvc": "llama-3",
"pvcMountPath": "/data/model",
},
},
"extra_key": "preserved",
}
blobRaw, _ := json.Marshal(profilingBlob)
trueVal := true
return &DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
Name: "test-dgdr",
Namespace: "default",
},
Spec: DynamoGraphDeploymentRequestSpec{
Model: "meta-llama/Llama-3.1-8B",
Backend: "vllm",
AutoApply: true,
UseMocker: true,
ProfilingConfig: ProfilingConfigSpec{
ProfilerImage: "nvcr.io/nvidia/dynamo:latest",
OutputPVC: "output-pvc",
Config: &apiextensionsv1.JSON{Raw: blobRaw},
ConfigMapRef: &ConfigMapKeySelector{Name: "base-config", Key: "disagg.yaml"},
},
EnableGPUDiscovery: &trueVal,
DeploymentOverrides: &DeploymentOverridesSpec{
Name: "my-dgd",
Namespace: "prod",
Labels: map[string]string{"team": "ml"},
},
},
Status: DynamoGraphDeploymentRequestStatus{
State: DGDRStateProfiling,
Backend: "vllm",
ObservedGeneration: 3,
ProfilingResults: "configmap/profiling-cm",
Deployment: &DeploymentStatus{
Name: "my-dgd",
Namespace: "prod",
State: "initializing",
Created: true,
},
},
}
}
// newV1beta1DGDR builds a fully-populated v1beta1 DGDR for use in tests.
func newV1beta1DGDR() *v1beta1.DynamoGraphDeploymentRequest {
ttft := float64(300)
itl := float64(15)
isl := int32(1024)
osl := int32(256)
rawDGD, _ := json.Marshal(map[string]interface{}{"apiVersion": "nvidia.com/v1alpha1", "kind": "DynamoGraphDeployment"})
return &v1beta1.DynamoGraphDeploymentRequest{
ObjectMeta: metav1.ObjectMeta{
Name: "hub-dgdr",
Namespace: "default",
},
Spec: v1beta1.DynamoGraphDeploymentRequestSpec{
Model: "Qwen/Qwen3-32B",
Backend: v1beta1.BackendTypeVllm,
AutoApply: false,
Image: "nvcr.io/nvidia/dynamo:0.3.2",
SLA: &v1beta1.SLASpec{
TTFT: &ttft,
ITL: &itl,
},
Workload: &v1beta1.WorkloadSpec{
ISL: &isl,
OSL: &osl,
},
ModelCache: &v1beta1.ModelCacheSpec{
PVCName: "qwen-pvc",
PVCModelPath: "qwen3-32b",
PVCMountPath: "/models",
},
Features: &v1beta1.FeaturesSpec{
Mocker: &v1beta1.MockerSpec{Enabled: true},
},
},
Status: v1beta1.DynamoGraphDeploymentRequestStatus{
Phase: v1beta1.DGDRPhaseDeployed,
ObservedGeneration: 2,
DGDName: "hub-dgd",
ProfilingJobName: "profiling-job-1",
ProfilingResults: &v1beta1.ProfilingResultsStatus{
SelectedConfig: &runtime.RawExtension{Raw: rawDGD},
},
},
}
}
// TestConvertTo_SpecFields verifies that key v1alpha1 spec fields land in the correct v1beta1 locations.
func TestConvertTo_SpecFields(t *testing.T) {
src := newV1alpha1DGDR()
dst := &v1beta1.DynamoGraphDeploymentRequest{}
if err := src.ConvertTo(dst); err != nil {
t.Fatalf("ConvertTo() error = %v", err)
}
// Simple 1:1 fields
if dst.Spec.Model != src.Spec.Model {
t.Errorf("Model: got %q, want %q", dst.Spec.Model, src.Spec.Model)
}
if string(dst.Spec.Backend) != src.Spec.Backend {
t.Errorf("Backend: got %q, want %q", dst.Spec.Backend, src.Spec.Backend)
}
if dst.Spec.AutoApply != src.Spec.AutoApply {
t.Errorf("AutoApply: got %v, want %v", dst.Spec.AutoApply, src.Spec.AutoApply)
}
// ProfilerImage → Image
if dst.Spec.Image != src.Spec.ProfilingConfig.ProfilerImage {
t.Errorf("Image: got %q, want %q", dst.Spec.Image, src.Spec.ProfilingConfig.ProfilerImage)
}
// UseMocker → Features.Mocker.Enabled
if dst.Spec.Features == nil || dst.Spec.Features.Mocker == nil {
t.Fatal("Features.Mocker is nil")
}
if !dst.Spec.Features.Mocker.Enabled {
t.Error("Features.Mocker.Enabled: got false, want true")
}
// SLA from JSON blob
if dst.Spec.SLA == nil {
t.Fatal("SLA is nil")
}
if dst.Spec.SLA.TTFT == nil || *dst.Spec.SLA.TTFT != 500 {
t.Errorf("SLA.TTFT: got %v, want 500", dst.Spec.SLA.TTFT)
}
if dst.Spec.SLA.ITL == nil || *dst.Spec.SLA.ITL != 20 {
t.Errorf("SLA.ITL: got %v, want 20", dst.Spec.SLA.ITL)
}
// Workload from JSON blob
if dst.Spec.Workload == nil {
t.Fatal("Workload is nil")
}
if dst.Spec.Workload.ISL == nil || *dst.Spec.Workload.ISL != 2048 {
t.Errorf("Workload.ISL: got %v, want 2048", dst.Spec.Workload.ISL)
}
if dst.Spec.Workload.OSL == nil || *dst.Spec.Workload.OSL != 512 {
t.Errorf("Workload.OSL: got %v, want 512", dst.Spec.Workload.OSL)
}
// ModelCache from JSON blob
if dst.Spec.ModelCache == nil {
t.Fatal("ModelCache is nil")
}
if dst.Spec.ModelCache.PVCName != "model-pvc" {
t.Errorf("ModelCache.PVCName: got %q, want %q", dst.Spec.ModelCache.PVCName, "model-pvc")
}
if dst.Spec.ModelCache.PVCModelPath != "llama-3" {
t.Errorf("ModelCache.PVCModelPath: got %q, want %q", dst.Spec.ModelCache.PVCModelPath, "llama-3")
}
if dst.Spec.ModelCache.PVCMountPath != "/data/model" {
t.Errorf("ModelCache.PVCMountPath: got %q, want %q", dst.Spec.ModelCache.PVCMountPath, "/data/model")
}
// EnableGPUDiscovery → annotation
if dst.Annotations[annDGDREnableGPUDisc] != "true" {
t.Errorf("annDGDREnableGPUDisc annotation: got %q, want %q", dst.Annotations[annDGDREnableGPUDisc], "true")
}
// OutputPVC → annotation
if dst.Annotations[annDGDROutputPVC] != "output-pvc" {
t.Errorf("annDGDROutputPVC annotation: got %q, want %q", dst.Annotations[annDGDROutputPVC], "output-pvc")
}
// DeploymentOverrides → annotation
if dst.Annotations[annDGDRDeployOverrides] == "" {
t.Error("annDGDRDeployOverrides annotation is empty")
}
}
// TestConvertTo_StatusFields verifies that key v1alpha1 status fields land in the correct v1beta1 locations.
func TestConvertTo_StatusFields(t *testing.T) {
src := newV1alpha1DGDR()
dst := &v1beta1.DynamoGraphDeploymentRequest{}
if err := src.ConvertTo(dst); err != nil {
t.Fatalf("ConvertTo() error = %v", err)
}
// Profiling state → Profiling phase
if dst.Status.Phase != v1beta1.DGDRPhaseProfiling {
t.Errorf("Status.Phase: got %q, want %q", dst.Status.Phase, v1beta1.DGDRPhaseProfiling)
}
if dst.Status.ObservedGeneration != 3 {
t.Errorf("Status.ObservedGeneration: got %d, want 3", dst.Status.ObservedGeneration)
}
// Deployment.Name → DGDName
if dst.Status.DGDName != "my-dgd" {
t.Errorf("Status.DGDName: got %q, want %q", dst.Status.DGDName, "my-dgd")
}
// Backend → annotation
if dst.Annotations[annDGDRStatusBackend] != "vllm" {
t.Errorf("annDGDRStatusBackend annotation: got %q, want %q", dst.Annotations[annDGDRStatusBackend], "vllm")
}
// ProfilingResults → annotation
if dst.Annotations[annDGDRProfilingResults] != "configmap/profiling-cm" {
t.Errorf("annDGDRProfilingResults annotation: got %q, want %q", dst.Annotations[annDGDRProfilingResults], "configmap/profiling-cm")
}
}
// TestAlpha1RoundTrip verifies v1alpha1 → v1beta1 → v1alpha1 preserves all round-tripped fields.
func TestAlpha1RoundTrip(t *testing.T) {
original := newV1alpha1DGDR()
// Step 1: v1alpha1 → v1beta1
hub := &v1beta1.DynamoGraphDeploymentRequest{}
if err := original.ConvertTo(hub); err != nil {
t.Fatalf("ConvertTo() error = %v", err)
}
// Step 2: v1beta1 → v1alpha1
restored := &DynamoGraphDeploymentRequest{}
if err := restored.ConvertFrom(hub); err != nil {
t.Fatalf("ConvertFrom() error = %v", err)
}
// --- Spec checks ---
if restored.Spec.Model != original.Spec.Model {
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model)
}
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.UseMocker != original.Spec.UseMocker {
t.Errorf("Spec.UseMocker: got %v, want %v", restored.Spec.UseMocker, original.Spec.UseMocker)
}
if restored.Spec.ProfilingConfig.ProfilerImage != original.Spec.ProfilingConfig.ProfilerImage {
t.Errorf("ProfilingConfig.ProfilerImage: got %q, want %q", restored.Spec.ProfilingConfig.ProfilerImage, original.Spec.ProfilingConfig.ProfilerImage)
}
if restored.Spec.ProfilingConfig.OutputPVC != original.Spec.ProfilingConfig.OutputPVC {
t.Errorf("ProfilingConfig.OutputPVC: got %q, want %q", restored.Spec.ProfilingConfig.OutputPVC, original.Spec.ProfilingConfig.OutputPVC)
}
// ConfigMapRef round-trip
if restored.Spec.ProfilingConfig.ConfigMapRef == nil {
t.Fatal("ProfilingConfig.ConfigMapRef is nil after round-trip")
}
if restored.Spec.ProfilingConfig.ConfigMapRef.Name != original.Spec.ProfilingConfig.ConfigMapRef.Name {
t.Errorf("ConfigMapRef.Name: got %q, want %q", restored.Spec.ProfilingConfig.ConfigMapRef.Name, original.Spec.ProfilingConfig.ConfigMapRef.Name)
}
// EnableGPUDiscovery round-trip
if restored.Spec.EnableGPUDiscovery == nil || !*restored.Spec.EnableGPUDiscovery {
t.Error("Spec.EnableGPUDiscovery: expected true after round-trip")
}
// DeploymentOverrides round-trip
if restored.Spec.DeploymentOverrides == nil {
t.Fatal("Spec.DeploymentOverrides is nil after round-trip")
}
if restored.Spec.DeploymentOverrides.Name != original.Spec.DeploymentOverrides.Name {
t.Errorf("DeploymentOverrides.Name: got %q, want %q", restored.Spec.DeploymentOverrides.Name, original.Spec.DeploymentOverrides.Name)
}
// JSON blob round-trip: SLA fields re-emerge in ProfilingConfig.Config
if restored.Spec.ProfilingConfig.Config == nil {
t.Fatal("ProfilingConfig.Config is nil after round-trip")
}
var blob map[string]interface{}
if err := json.Unmarshal(restored.Spec.ProfilingConfig.Config.Raw, &blob); err != nil {
t.Fatalf("failed to unmarshal restored ProfilingConfig.Config: %v", err)
}
slaMap, _ := blob["sla"].(map[string]interface{})
if slaMap == nil {
t.Fatal("sla key missing in restored JSON blob")
}
if slaMap["ttft"] != float64(500) {
t.Errorf("blob sla.ttft: got %v, want 500", slaMap["ttft"])
}
if slaMap["isl"] != float64(2048) {
t.Errorf("blob sla.isl: got %v, want 2048", slaMap["isl"])
}
// Verify unknown keys are preserved via the annotation round-trip
if blob["extra_key"] != "preserved" {
t.Errorf("extra_key: got %v, want %q", blob["extra_key"], "preserved")
}
// --- Status checks ---
if restored.Status.State != original.Status.State {
t.Errorf("Status.State: got %q, want %q", restored.Status.State, original.Status.State)
}
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration)
}
if restored.Status.Backend != original.Status.Backend {
t.Errorf("Status.Backend: got %q, want %q", restored.Status.Backend, original.Status.Backend)
}
if restored.Status.ProfilingResults != original.Status.ProfilingResults {
t.Errorf("Status.ProfilingResults: got %q, want %q", restored.Status.ProfilingResults, original.Status.ProfilingResults)
}
if restored.Status.Deployment == nil {
t.Fatal("Status.Deployment is nil after round-trip")
}
if restored.Status.Deployment.Name != original.Status.Deployment.Name {
t.Errorf("Status.Deployment.Name: got %q, want %q", restored.Status.Deployment.Name, original.Status.Deployment.Name)
}
if restored.Status.Deployment.Created != original.Status.Deployment.Created {
t.Errorf("Status.Deployment.Created: got %v, want %v", restored.Status.Deployment.Created, original.Status.Deployment.Created)
}
}
// TestHubRoundTrip verifies v1beta1 → v1alpha1 → v1beta1 preserves all round-tripped fields.
func TestHubRoundTrip(t *testing.T) {
original := newV1beta1DGDR()
// Step 1: v1beta1 → v1alpha1
spoke := &DynamoGraphDeploymentRequest{}
if err := spoke.ConvertFrom(original); err != nil {
t.Fatalf("ConvertFrom() error = %v", err)
}
// Step 2: v1alpha1 → v1beta1
restored := &v1beta1.DynamoGraphDeploymentRequest{}
if err := spoke.ConvertTo(restored); err != nil {
t.Fatalf("ConvertTo() error = %v", err)
}
// --- Spec checks ---
if restored.Spec.Model != original.Spec.Model {
t.Errorf("Spec.Model: got %q, want %q", restored.Spec.Model, original.Spec.Model)
}
if restored.Spec.Backend != original.Spec.Backend {
t.Errorf("Spec.Backend: got %q, want %q", restored.Spec.Backend, original.Spec.Backend)
}
if restored.Spec.AutoApply != original.Spec.AutoApply {
t.Errorf("Spec.AutoApply: got %v, want %v", restored.Spec.AutoApply, original.Spec.AutoApply)
}
if restored.Spec.Image != original.Spec.Image {
t.Errorf("Spec.Image: got %q, want %q", restored.Spec.Image, original.Spec.Image)
}
// UseMocker round-trip via Features.Mocker.Enabled
if restored.Spec.Features == nil || restored.Spec.Features.Mocker == nil {
t.Fatal("Spec.Features.Mocker is nil after round-trip")
}
if restored.Spec.Features.Mocker.Enabled != original.Spec.Features.Mocker.Enabled {
t.Errorf("Features.Mocker.Enabled: got %v, want %v", restored.Spec.Features.Mocker.Enabled, original.Spec.Features.Mocker.Enabled)
}
// SLA round-trip via JSON blob
if restored.Spec.SLA == nil {
t.Fatal("Spec.SLA is nil after round-trip")
}
if restored.Spec.SLA.TTFT == nil || *restored.Spec.SLA.TTFT != *original.Spec.SLA.TTFT {
t.Errorf("SLA.TTFT: got %v, want %v", restored.Spec.SLA.TTFT, original.Spec.SLA.TTFT)
}
if restored.Spec.SLA.ITL == nil || *restored.Spec.SLA.ITL != *original.Spec.SLA.ITL {
t.Errorf("SLA.ITL: got %v, want %v", restored.Spec.SLA.ITL, original.Spec.SLA.ITL)
}
// Workload round-trip via JSON blob
if restored.Spec.Workload == nil {
t.Fatal("Spec.Workload is nil after round-trip")
}
if restored.Spec.Workload.ISL == nil || *restored.Spec.Workload.ISL != *original.Spec.Workload.ISL {
t.Errorf("Workload.ISL: got %v, want %v", restored.Spec.Workload.ISL, original.Spec.Workload.ISL)
}
if restored.Spec.Workload.OSL == nil || *restored.Spec.Workload.OSL != *original.Spec.Workload.OSL {
t.Errorf("Workload.OSL: got %v, want %v", restored.Spec.Workload.OSL, original.Spec.Workload.OSL)
}
// ModelCache round-trip via JSON blob
if restored.Spec.ModelCache == nil {
t.Fatal("Spec.ModelCache is nil after round-trip")
}
if restored.Spec.ModelCache.PVCName != original.Spec.ModelCache.PVCName {
t.Errorf("ModelCache.PVCName: got %q, want %q", restored.Spec.ModelCache.PVCName, original.Spec.ModelCache.PVCName)
}
if restored.Spec.ModelCache.PVCModelPath != original.Spec.ModelCache.PVCModelPath {
t.Errorf("ModelCache.PVCModelPath: got %q, want %q", restored.Spec.ModelCache.PVCModelPath, original.Spec.ModelCache.PVCModelPath)
}
if restored.Spec.ModelCache.PVCMountPath != original.Spec.ModelCache.PVCMountPath {
t.Errorf("ModelCache.PVCMountPath: got %q, want %q", restored.Spec.ModelCache.PVCMountPath, original.Spec.ModelCache.PVCMountPath)
}
// --- Status checks ---
// Deployed → Ready (lossy: v1alpha1 has no "Deployed" state; maps to "Ready")
// then on the way back Ready→Ready
if restored.Status.Phase != v1beta1.DGDRPhaseReady {
t.Errorf("Status.Phase: got %q, want %q (Deployed→Ready is lossy)", restored.Status.Phase, v1beta1.DGDRPhaseReady)
}
if restored.Status.ObservedGeneration != original.Status.ObservedGeneration {
t.Errorf("Status.ObservedGeneration: got %d, want %d", restored.Status.ObservedGeneration, original.Status.ObservedGeneration)
}
// DGDName round-trip
if restored.Status.DGDName != original.Status.DGDName {
t.Errorf("Status.DGDName: got %q, want %q", restored.Status.DGDName, original.Status.DGDName)
}
// ProfilingJobName round-trip via annotation
if restored.Status.ProfilingJobName != original.Status.ProfilingJobName {
t.Errorf("Status.ProfilingJobName: got %q, want %q", restored.Status.ProfilingJobName, original.Status.ProfilingJobName)
}
// GeneratedDeployment round-trip via ProfilingResults.SelectedConfig
if restored.Status.ProfilingResults == nil || restored.Status.ProfilingResults.SelectedConfig == nil {
t.Fatal("Status.ProfilingResults.SelectedConfig is nil after round-trip")
}
}
// TestConvertTo_InvalidProfilingConfigJSON verifies that malformed JSON in ProfilingConfig.Config
// returns an error rather than silently producing an incomplete conversion.
func TestConvertTo_InvalidProfilingConfigJSON(t *testing.T) {
src := newV1alpha1DGDR()
src.Spec.ProfilingConfig.Config = &apiextensionsv1.JSON{Raw: []byte(`{not valid json`)}
dst := &v1beta1.DynamoGraphDeploymentRequest{}
err := src.ConvertTo(dst)
if err == nil {
t.Fatal("ConvertTo() expected error for invalid JSON, got nil")
}
}
......@@ -273,9 +273,15 @@ type DynamoGraphDeploymentRequestStatus struct {
// The spec becomes immutable once profiling starts. Users must delete and recreate
// the DGDR to modify configuration after this point.
//
// DEPRECATION NOTICE: v1alpha1 DynamoGraphDeploymentRequest is deprecated.
// Please migrate to nvidia.com/v1beta1 DynamoGraphDeploymentRequest.
// v1alpha1 will be removed in a future release.
//
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:storageversion
// +kubebuilder:resource:shortName=dgdr
// +kubebuilder:deprecatedversion:warning="nvidia.com/v1alpha1 DynamoGraphDeploymentRequest is deprecated; use nvidia.com/v1beta1 DynamoGraphDeploymentRequest"
// +kubebuilder:printcolumn:name="Model",type=string,JSONPath=`.spec.model`
// +kubebuilder:printcolumn:name="Backend",type=string,JSONPath=`.status.backend`
// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state`
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package v1beta1
import (
"sigs.k8s.io/controller-runtime/pkg/conversion"
)
// Hub marks v1beta1 as the conversion hub for all DGDR versions.
// All other versions (v1alpha1, future versions) convert through v1beta1.
func (*DynamoGraphDeploymentRequest) Hub() {}
// Ensure v1beta1 implements the Hub interface.
var _ conversion.Hub = &DynamoGraphDeploymentRequest{}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package v1beta1
import (
batchv1 "k8s.io/api/batch/v1"
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DGDRPhase represents the lifecycle phase of a DynamoGraphDeploymentRequest.
// +kubebuilder:validation:Enum=Pending;Profiling;Ready;Deploying;Deployed;Failed
type DGDRPhase string
const (
DGDRPhasePending DGDRPhase = "Pending"
DGDRPhaseProfiling DGDRPhase = "Profiling"
DGDRPhaseReady DGDRPhase = "Ready"
DGDRPhaseDeploying DGDRPhase = "Deploying"
DGDRPhaseDeployed DGDRPhase = "Deployed"
DGDRPhaseFailed DGDRPhase = "Failed"
// Condition types
// ConditionTypeSucceeded is the aggregate condition for the DGDR lifecycle.
// True = pipeline completed successfully; False = in progress or failed.
// Reason and Message reflect the current stage or error.
ConditionTypeSucceeded = "Succeeded"
ConditionTypeValidation = "Validation"
ConditionTypeProfiling = "Profiling"
ConditionTypeSpecGenerated = "SpecGenerated"
ConditionTypeDeploymentReady = "DeploymentReady"
// Event reasons
EventReasonInitialized = "Initialized"
EventReasonValidationFailed = "ValidationFailed"
EventReasonProfilingJobCreated = "ProfilingJobCreated"
EventReasonProfilingJobFailed = "ProfilingJobFailed"
EventReasonAIConfiguratorFailed = "AIConfiguratorFailed"
EventReasonSpecGenerated = "SpecGenerated"
EventReasonSpecChangeRejected = "SpecChangeRejected"
EventReasonDeploymentCreated = "DeploymentCreated"
EventReasonDeploymentReady = "DeploymentReady"
EventReasonDeploymentDegraded = "DeploymentDegraded"
EventReasonDeploymentDeleted = "DeploymentDeleted"
// Label keys
LabelApp = "app"
LabelDGDR = "dgdr"
LabelDGDRName = "dgdr.nvidia.com/name"
LabelDGDRNamespace = "dgdr.nvidia.com/namespace"
LabelManagedBy = "nvidia.com/managed-by"
// Label values
LabelValueDynamoProfiler = "dynamo-profiler"
LabelValueAICProfiler = "aic-profiler"
LabelValueDynamoOperator = "dynamo-operator"
)
// ProfilingPhase represents a sub-phase within the profiling pipeline.
// When the DGDR Phase is "Profiling", this value indicates which step
// of the profiling pipeline is currently executing.
// +kubebuilder:validation:Enum=Initializing;SweepingPrefill;SweepingDecode;SelectingConfig;BuildingCurves;GeneratingDGD;Done
type ProfilingPhase string
const (
// Profiler is loading the DGD template, detecting GPU hardware,
// and resolving the model architecture from HuggingFace.
ProfilingPhaseInitializing ProfilingPhase = "Initializing"
// Sweeping parallelization strategies (TP/TEP/DEP) across GPU counts
// for prefill, measuring TTFT at each configuration.
ProfilingPhaseSweepingPrefill ProfilingPhase = "SweepingPrefill"
// Sweeping parallelization strategies and concurrency levels
// for decode, measuring ITL at each configuration.
ProfilingPhaseSweepingDecode ProfilingPhase = "SweepingDecode"
// Filtering results against SLA targets and selecting the most
// cost-efficient configuration that meets TTFT/ITL requirements.
ProfilingPhaseSelectingConfig ProfilingPhase = "SelectingConfig"
// Building detailed interpolation curves (ISL→TTFT for prefill,
// KV-usage×context-length→ITL for decode) using the selected configs.
ProfilingPhaseBuildingCurves ProfilingPhase = "BuildingCurves"
// Packaging profiling data into a ConfigMap and generating
// the final DGD YAML with planner integration.
ProfilingPhaseGeneratingDGD ProfilingPhase = "GeneratingDGD"
// Profiling pipeline finished successfully.
ProfilingPhaseDone ProfilingPhase = "Done"
)
// Profiling condition Reasons.
//
// Hybrid A+D approach: the status.profilingPhase field is the canonical source
// of the current profiling sub-phase, while the Profiling condition's Reason
// mirrors the phase for kubectl-describe readability. On failure, the Reason
// is set to "<Phase>Failed" to encode both the phase and the error in one field.
const (
// ProfilingReasonInitializing indicates the profiler is loading the DGD template,
// detecting GPU hardware, and resolving the model architecture.
ProfilingReasonInitializing = "Initializing"
// ProfilingReasonSweepingPrefill indicates the profiler is sweeping parallelization
// strategies (TP/TEP/DEP) across GPU counts for prefill, measuring TTFT.
ProfilingReasonSweepingPrefill = "SweepingPrefill"
// ProfilingReasonSweepingDecode indicates the profiler is sweeping parallelization
// strategies and concurrency levels for decode, measuring ITL.
ProfilingReasonSweepingDecode = "SweepingDecode"
// ProfilingReasonSelectingConfig indicates the profiler is filtering results against
// SLA targets and selecting the most cost-efficient configuration.
ProfilingReasonSelectingConfig = "SelectingConfig"
// ProfilingReasonBuildingCurves indicates the profiler is building interpolation
// curves (ISL→TTFT, KV-usage×context-length→ITL) for planner integration.
ProfilingReasonBuildingCurves = "BuildingCurves"
// ProfilingReasonGeneratingDGD indicates the profiler is packaging data into a
// ConfigMap and generating the final DGD YAML.
ProfilingReasonGeneratingDGD = "GeneratingDGD"
// ProfilingReasonInitializingFailed indicates the initialization phase failed.
ProfilingReasonInitializingFailed = "InitializingFailed"
// ProfilingReasonSweepingPrefillFailed indicates the prefill sweep phase failed.
ProfilingReasonSweepingPrefillFailed = "SweepingPrefillFailed"
// ProfilingReasonSweepingDecodeFailed indicates the decode sweep phase failed.
ProfilingReasonSweepingDecodeFailed = "SweepingDecodeFailed"
// ProfilingReasonSelectingConfigFailed indicates the config selection phase failed.
ProfilingReasonSelectingConfigFailed = "SelectingConfigFailed"
// ProfilingReasonBuildingCurvesFailed indicates the curve-building phase failed.
ProfilingReasonBuildingCurvesFailed = "BuildingCurvesFailed"
// ProfilingReasonGeneratingDGDFailed indicates the DGD generation phase failed.
ProfilingReasonGeneratingDGDFailed = "GeneratingDGDFailed"
// ProfilingReasonCompleted indicates the profiling pipeline finished successfully.
ProfilingReasonCompleted = "Completed"
// ProfilingReasonJobCreationFailed indicates the Kubernetes Job for profiling
// could not be created.
ProfilingReasonJobCreationFailed = "JobCreationFailed"
)
// OptimizationType specifies the profiling optimization strategy.
// +kubebuilder:validation:Enum=latency;throughput
type OptimizationType string
const (
OptimizationTypeLatency OptimizationType = "latency"
OptimizationTypeThroughput OptimizationType = "throughput"
)
// SearchStrategy controls the profiling search depth.
// +kubebuilder:validation:Enum=rapid;thorough
type SearchStrategy string
const (
SearchStrategyRapid SearchStrategy = "rapid"
SearchStrategyThorough SearchStrategy = "thorough"
)
// BackendType specifies the inference backend.
// +kubebuilder:validation:Enum=auto;sglang;trtllm;vllm
type BackendType string
const (
BackendTypeAuto BackendType = "auto"
BackendTypeSglang BackendType = "sglang"
BackendTypeTrtllm BackendType = "trtllm"
BackendTypeVllm BackendType = "vllm"
)
// WorkloadSpec defines the workload characteristics for SLA-based profiling.
type WorkloadSpec struct {
// ISL is the Input Sequence Length (number of tokens).
// +optional
// +kubebuilder:default=4000
ISL *int32 `json:"isl,omitempty"`
// OSL is the Output Sequence Length (number of tokens).
// +optional
// +kubebuilder:default=1000
OSL *int32 `json:"osl,omitempty"`
// Concurrency is the target concurrency level.
// Required (or RequestRate) when the planner is disabled.
// +optional
Concurrency *float64 `json:"concurrency,omitempty"`
// RequestRate is the target request rate (req/s).
// Required (or Concurrency) when the planner is disabled.
// +optional
RequestRate *float64 `json:"requestRate,omitempty"`
}
// SLASpec defines the service-level agreement targets for profiling optimization.
// Exactly one mode should be active: ttft+itl (default), e2eLatency, or optimizationType.
type SLASpec struct {
// OptimizationType controls the profiling optimization strategy.
// Use when explicit SLA targets (ttft+itl or e2eLatency) are not known.
// +optional
// +kubebuilder:validation:Enum=latency;throughput
OptimizationType OptimizationType `json:"optimizationType,omitempty"`
// TTFT is the Time To First Token target in milliseconds.
// +optional
// +python-default=2000
TTFT *float64 `json:"ttft,omitempty"`
// ITL is the Inter-Token Latency target in milliseconds.
// +optional
// +python-default=30
ITL *float64 `json:"itl,omitempty"`
// E2ELatency is the target end-to-end request latency in milliseconds.
// Alternative to specifying TTFT + ITL.
// +optional
E2ELatency *float64 `json:"e2eLatency,omitempty"`
}
// ModelCacheSpec references a PVC containing pre-downloaded model weights.
type ModelCacheSpec struct {
// PVCName is the name of the PersistentVolumeClaim containing model weights.
// The PVC must exist in the same namespace as the DGDR.
// +optional
PVCName string `json:"pvcName,omitempty"`
// PVCModelPath is the path to the model checkpoint directory within the PVC
// (e.g. "deepseek-r1" or "models/Llama-3.1-405B-FP8").
// +optional
PVCModelPath string `json:"pvcModelPath,omitempty"`
// PVCMountPath is the mount path for the PVC inside the container.
// +optional
// +kubebuilder:default="/opt/model-cache"
PVCMountPath string `json:"pvcMountPath,omitempty"`
}
// OverridesSpec allows customizing the profiling job and the generated DynamoGraphDeployment.
type OverridesSpec struct {
// ProfilingJob allows overriding the profiling Job specification.
// Fields set here are merged into the controller-generated Job spec.
// +optional
ProfilingJob *batchv1.JobSpec `json:"profilingJob,omitempty"`
// DGD allows providing a full or partial nvidia.com/v1alpha1 DynamoGraphDeployment
// to use as the base for the generated deployment. Fields from profiling results
// are merged on top. Use this to override backend worker images.
//
// The field is stored as a raw embedded resource rather than a typed
// *v1alpha1.DynamoGraphDeployment to avoid a circular import: v1alpha1 already
// imports v1beta1 as the conversion hub and Go does not allow import cycles.
//
// The EmbeddedResource marker tells the API server to validate that the value is a
// well-formed Kubernetes object (has apiVersion/kind), but does not enforce that it
// is specifically a DynamoGraphDeployment. Full type validation (correct apiVersion,
// kind, and field schema) is performed by the controller during reconciliation.
// TODO(future MR): add webhook admission validation for the DGD field type.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:EmbeddedResource
DGD *runtime.RawExtension `json:"dgd,omitempty"`
}
// PlannerPreDeploymentSweepMode controls pre-deployment sweeping thoroughness for planner profiling.
// +kubebuilder:validation:Enum=none;rapid;thorough
type PlannerPreDeploymentSweepMode string
const (
PlannerPreDeploymentSweepModeNone PlannerPreDeploymentSweepMode = "none"
PlannerPreDeploymentSweepModeRapid PlannerPreDeploymentSweepMode = "rapid"
PlannerPreDeploymentSweepModeThorough PlannerPreDeploymentSweepMode = "thorough"
)
// PlannerSpec configures the SLA planner for autoscaling in the generated DGD.
type PlannerSpec struct {
// Enabled indicates whether the planner is enabled.
// +optional
Enabled bool `json:"enabled,omitempty"`
// PlannerPreDeploymentSweeping controls pre-deployment sweeping mode for planner in-depth profiling.
// "none" means no pre-deployment sweep (only load-based scaling).
// "rapid" uses AI Configurator to simulate engine performance.
// "thorough" uses real GPUs to measure engine performance (takes several hours).
// +optional
// +kubebuilder:validation:Enum=none;rapid;thorough
PlannerPreDeploymentSweeping *PlannerPreDeploymentSweepMode `json:"plannerPreDeploymentSweeping,omitempty"`
// PlannerArgsList is a list of additional planner arguments.
// +optional
PlannerArgsList []string `json:"plannerArgsList,omitempty"`
}
// MockerSpec configures the simulated (mocker) backend.
type MockerSpec struct {
// Enabled indicates whether to deploy mocker workers instead of real inference workers.
// Useful for large-scale testing without GPUs.
// +optional
Enabled bool `json:"enabled,omitempty"`
}
// KVRouterSpec configures KV-cache-aware routing.
type KVRouterSpec struct {
// Enabled indicates whether to enable KV-cache-aware routing in the generated DGD.
// KV routing optimizes request scheduling based on KV cache locality.
// +optional
Enabled bool `json:"enabled,omitempty"`
}
// FeaturesSpec controls optional Dynamo platform features in the generated deployment.
type FeaturesSpec struct {
// Planner configures the SLA planner for autoscaling in the generated DGD.
// +optional
Planner *PlannerSpec `json:"planner,omitempty"`
// TODO: KVRouter support is not yet implemented in the operator.
// KVRouter *KVRouterSpec `json:"kvRouter,omitempty"`
// Mocker configures the simulated (mocker) backend for testing without GPUs.
// +optional
Mocker *MockerSpec `json:"mocker,omitempty"`
}
// HardwareSpec describes the hardware resources available for profiling and deployment.
// These fields are typically auto-filled by the operator from cluster discovery.
type HardwareSpec struct {
// GPUSKU is the GPU SKU identifier (e.g., "H100_SXM", "A100_80GB").
// +optional
GPUSKU string `json:"gpuSku,omitempty"`
// VRAMMB is the VRAM per GPU in MiB.
// +optional
VRAMMB *float64 `json:"vramMb,omitempty"`
// TotalGPUs is the total number of GPUs available in the cluster.
// +optional
TotalGPUs *int32 `json:"totalGpus,omitempty"`
// NumGPUsPerNode is the number of GPUs per node.
// +optional
NumGPUsPerNode *int32 `json:"numGpusPerNode,omitempty"`
}
// DynamoGraphDeploymentRequestSpec defines the desired state of a DynamoGraphDeploymentRequest.
// Only the Model field is required; all other fields are optional and have sensible defaults.
type DynamoGraphDeploymentRequestSpec struct {
// Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").
// Can be a HuggingFace ID or a private model name.
// +kubebuilder:validation:Required
// +kubebuilder:validation:MinLength=1
Model string `json:"model"`
// Backend specifies the inference backend to use for profiling and deployment.
// +optional
// +kubebuilder:default=auto
// +kubebuilder:validation:Enum=auto;sglang;trtllm;vllm
Backend BackendType `json:"backend,omitempty"`
// Image is the container image reference for the profiling job (frontend image).
// Example: "nvcr.io/nvidia/dynamo-runtime:latest"
// TODO: In a future MR, the operator will derive the backend inference image from the
// backend type automatically; backend images can be overridden via overrides.dgd.
// +optional
Image string `json:"image,omitempty"`
// ModelCache provides optional PVC configuration for pre-downloaded model weights.
// When provided, weights are loaded from the PVC instead of downloading from HuggingFace.
// +optional
ModelCache *ModelCacheSpec `json:"modelCache,omitempty"`
// Hardware describes the hardware resources available for profiling and deployment.
// Typically auto-filled by the operator from cluster discovery.
// +optional
Hardware *HardwareSpec `json:"hardware,omitempty"`
// Workload defines the expected workload characteristics for SLA-based profiling.
// +optional
Workload *WorkloadSpec `json:"workload,omitempty"`
// SLA defines service-level agreement targets that drive profiling optimization.
// +optional
SLA *SLASpec `json:"sla,omitempty"`
// Overrides allows customizing the profiling job and the generated DynamoGraphDeployment.
// +optional
Overrides *OverridesSpec `json:"overrides,omitempty"`
// Features controls optional Dynamo platform features in the generated deployment.
// +optional
Features *FeaturesSpec `json:"features,omitempty"`
// SearchStrategy controls the profiling search depth.
// "rapid" performs a fast sweep; "thorough" explores more configurations.
// +optional
// +kubebuilder:default=rapid
// +kubebuilder:validation:Enum=rapid;thorough
SearchStrategy SearchStrategy `json:"searchStrategy,omitempty"`
// AutoApply indicates whether to automatically create a DynamoGraphDeployment
// after profiling completes. If false, the generated spec is stored in status
// for manual review and application.
// +optional
// +kubebuilder:default=true
AutoApply bool `json:"autoApply,omitempty"`
}
// ParetoConfig represents a single Pareto-optimal deployment configuration
// discovered during profiling.
type ParetoConfig struct {
// Config is the full deployment configuration for this Pareto point.
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Type=object
Config runtime.RawExtension `json:"config"`
}
// ProfilingResultsStatus contains the output of the profiling process.
type ProfilingResultsStatus struct {
// Pareto is the list of Pareto-optimal deployment configurations discovered during profiling.
// Each entry represents a different cost/performance trade-off.
// +optional
Pareto []ParetoConfig `json:"pareto,omitempty"`
// SelectedConfig is the recommended configuration chosen by the profiler
// based on the SLA targets. This is the configuration used for deployment
// when autoApply is true.
// +optional
// +kubebuilder:pruning:PreserveUnknownFields
// +kubebuilder:validation:Type=object
SelectedConfig *runtime.RawExtension `json:"selectedConfig,omitempty"`
}
// DeploymentInfoStatus tracks the state of the deployed DynamoGraphDeployment.
type DeploymentInfoStatus struct {
// Replicas is the desired number of replicas.
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// AvailableReplicas is the number of replicas that are available and ready.
// +optional
AvailableReplicas *int32 `json:"availableReplicas,omitempty"`
}
// DynamoGraphDeploymentRequestStatus represents the observed state of a DynamoGraphDeploymentRequest.
type DynamoGraphDeploymentRequestStatus struct {
// Phase is the high-level lifecycle phase of the deployment request.
// +optional
Phase DGDRPhase `json:"phase,omitempty"`
// ProfilingPhase indicates the current sub-phase of the profiling pipeline.
// Only meaningful when Phase is "Profiling". Cleared when profiling completes or fails.
// +optional
ProfilingPhase ProfilingPhase `json:"profilingPhase,omitempty"`
// DGDName is the name of the generated or created DynamoGraphDeployment.
// +optional
DGDName string `json:"dgdName,omitempty"`
// ProfilingJobName is the name of the Kubernetes Job running the profiler.
// +optional
ProfilingJobName string `json:"profilingJobName,omitempty"`
// Conditions contains the latest observed conditions of the deployment request.
// Standard condition types include: Validated, ProfilingComplete, DeploymentReady.
// +optional
// +listType=map
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
// ProfilingResults contains the output of the profiling process including
// Pareto-optimal configurations and the selected deployment configuration.
// +optional
ProfilingResults *ProfilingResultsStatus `json:"profilingResults,omitempty"`
// DeploymentInfo tracks the state of the deployed DynamoGraphDeployment.
// Populated when a DGD has been created (either via autoApply or manually).
// +optional
DeploymentInfo *DeploymentInfoStatus `json:"deploymentInfo,omitempty"`
// ObservedGeneration is the most recent generation observed by the controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}
// DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests API.
// It provides a simplified, SLA-driven interface for deploying inference models on Dynamo.
// Users specify a model and optional performance targets; the controller handles profiling,
// configuration selection, and deployment.
//
// Lifecycle:
// 1. Pending: Spec validated, preparing for profiling
// 2. Profiling: Profiling job is running to discover optimal configurations
// 3. Ready: Profiling complete, generated DGD spec available in status
// 4. Deploying: DGD is being created and rolled out (when autoApply=true)
// 5. Deployed: DGD is running and healthy
// 6. Failed: An unrecoverable error occurred
//
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:shortName=dgdr
// +kubebuilder:printcolumn:name="Model",type=string,JSONPath=`.spec.model`
// +kubebuilder:printcolumn:name="Backend",type=string,JSONPath=`.spec.backend`
// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase`
// +kubebuilder:printcolumn:name="Profiling",type=string,JSONPath=`.status.profilingPhase`
// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=="Succeeded")].reason`,priority=1
// +kubebuilder:printcolumn:name="Message",type=string,JSONPath=`.status.conditions[?(@.type=="Succeeded")].message`,priority=1
// +kubebuilder:printcolumn:name="DGD",type=string,JSONPath=`.status.dgdName`
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
type DynamoGraphDeploymentRequest struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
// Spec defines the desired state for this deployment request.
Spec DynamoGraphDeploymentRequestSpec `json:"spec,omitempty"`
// Status reflects the current observed state of this deployment request.
Status DynamoGraphDeploymentRequestStatus `json:"status,omitempty"`
}
// +kubebuilder:object:root=true
// DynamoGraphDeploymentRequestList contains a list of DynamoGraphDeploymentRequest resources.
type DynamoGraphDeploymentRequestList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []DynamoGraphDeploymentRequest `json:"items"`
}
func init() {
SchemeBuilder.Register(&DynamoGraphDeploymentRequest{}, &DynamoGraphDeploymentRequestList{})
}
// SetPhase updates the Phase field in the DGDR status.
func (d *DynamoGraphDeploymentRequest) SetPhase(phase DGDRPhase) {
d.Status.Phase = phase
}
// GetPhase returns the current lifecycle phase.
func (d *DynamoGraphDeploymentRequest) GetPhase() DGDRPhase {
return d.Status.Phase
}
// SetProfilingPhase updates the profiling sub-phase.
func (d *DynamoGraphDeploymentRequest) SetProfilingPhase(phase ProfilingPhase) {
d.Status.ProfilingPhase = phase
}
// ClearProfilingPhase resets the profiling sub-phase (e.g., on completion or failure).
func (d *DynamoGraphDeploymentRequest) ClearProfilingPhase() {
d.Status.ProfilingPhase = ""
}
// AddStatusCondition adds or updates a condition in the status.
// Uses apimeta.SetStatusCondition to correctly preserve LastTransitionTime.
func (d *DynamoGraphDeploymentRequest) AddStatusCondition(condition metav1.Condition) {
apimeta.SetStatusCondition(&d.Status.Conditions, condition)
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Package v1beta1 contains API Schema definitions for the nvidia.com v1beta1 API group.
// +kubebuilder:object:generate=true
// +groupName=nvidia.com
package v1beta1
import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)
var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "nvidia.com", Version: "v1beta1"}
// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
//go:build !ignore_autogenerated
/*
SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Code generated by controller-gen. DO NOT EDIT.
package v1beta1
import (
"k8s.io/api/batch/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeploymentInfoStatus) DeepCopyInto(out *DeploymentInfoStatus) {
*out = *in
if in.Replicas != nil {
in, out := &in.Replicas, &out.Replicas
*out = new(int32)
**out = **in
}
if in.AvailableReplicas != nil {
in, out := &in.AvailableReplicas, &out.AvailableReplicas
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeploymentInfoStatus.
func (in *DeploymentInfoStatus) DeepCopy() *DeploymentInfoStatus {
if in == nil {
return nil
}
out := new(DeploymentInfoStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequest) DeepCopyInto(out *DynamoGraphDeploymentRequest) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
in.Spec.DeepCopyInto(&out.Spec)
in.Status.DeepCopyInto(&out.Status)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequest.
func (in *DynamoGraphDeploymentRequest) DeepCopy() *DynamoGraphDeploymentRequest {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequest)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *DynamoGraphDeploymentRequest) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestList) DeepCopyInto(out *DynamoGraphDeploymentRequestList) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ListMeta.DeepCopyInto(&out.ListMeta)
if in.Items != nil {
in, out := &in.Items, &out.Items
*out = make([]DynamoGraphDeploymentRequest, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestList.
func (in *DynamoGraphDeploymentRequestList) DeepCopy() *DynamoGraphDeploymentRequestList {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestList)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *DynamoGraphDeploymentRequestList) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestSpec) DeepCopyInto(out *DynamoGraphDeploymentRequestSpec) {
*out = *in
if in.ModelCache != nil {
in, out := &in.ModelCache, &out.ModelCache
*out = new(ModelCacheSpec)
**out = **in
}
if in.Hardware != nil {
in, out := &in.Hardware, &out.Hardware
*out = new(HardwareSpec)
(*in).DeepCopyInto(*out)
}
if in.Workload != nil {
in, out := &in.Workload, &out.Workload
*out = new(WorkloadSpec)
(*in).DeepCopyInto(*out)
}
if in.SLA != nil {
in, out := &in.SLA, &out.SLA
*out = new(SLASpec)
(*in).DeepCopyInto(*out)
}
if in.Overrides != nil {
in, out := &in.Overrides, &out.Overrides
*out = new(OverridesSpec)
(*in).DeepCopyInto(*out)
}
if in.Features != nil {
in, out := &in.Features, &out.Features
*out = new(FeaturesSpec)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestSpec.
func (in *DynamoGraphDeploymentRequestSpec) DeepCopy() *DynamoGraphDeploymentRequestSpec {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentRequestStatus) DeepCopyInto(out *DynamoGraphDeploymentRequestStatus) {
*out = *in
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]metav1.Condition, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.ProfilingResults != nil {
in, out := &in.ProfilingResults, &out.ProfilingResults
*out = new(ProfilingResultsStatus)
(*in).DeepCopyInto(*out)
}
if in.DeploymentInfo != nil {
in, out := &in.DeploymentInfo, &out.DeploymentInfo
*out = new(DeploymentInfoStatus)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoGraphDeploymentRequestStatus.
func (in *DynamoGraphDeploymentRequestStatus) DeepCopy() *DynamoGraphDeploymentRequestStatus {
if in == nil {
return nil
}
out := new(DynamoGraphDeploymentRequestStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FeaturesSpec) DeepCopyInto(out *FeaturesSpec) {
*out = *in
if in.Planner != nil {
in, out := &in.Planner, &out.Planner
*out = new(PlannerSpec)
(*in).DeepCopyInto(*out)
}
if in.Mocker != nil {
in, out := &in.Mocker, &out.Mocker
*out = new(MockerSpec)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FeaturesSpec.
func (in *FeaturesSpec) DeepCopy() *FeaturesSpec {
if in == nil {
return nil
}
out := new(FeaturesSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HardwareSpec) DeepCopyInto(out *HardwareSpec) {
*out = *in
if in.VRAMMB != nil {
in, out := &in.VRAMMB, &out.VRAMMB
*out = new(float64)
**out = **in
}
if in.TotalGPUs != nil {
in, out := &in.TotalGPUs, &out.TotalGPUs
*out = new(int32)
**out = **in
}
if in.NumGPUsPerNode != nil {
in, out := &in.NumGPUsPerNode, &out.NumGPUsPerNode
*out = new(int32)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HardwareSpec.
func (in *HardwareSpec) DeepCopy() *HardwareSpec {
if in == nil {
return nil
}
out := new(HardwareSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *KVRouterSpec) DeepCopyInto(out *KVRouterSpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KVRouterSpec.
func (in *KVRouterSpec) DeepCopy() *KVRouterSpec {
if in == nil {
return nil
}
out := new(KVRouterSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MockerSpec) DeepCopyInto(out *MockerSpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MockerSpec.
func (in *MockerSpec) DeepCopy() *MockerSpec {
if in == nil {
return nil
}
out := new(MockerSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ModelCacheSpec) DeepCopyInto(out *ModelCacheSpec) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCacheSpec.
func (in *ModelCacheSpec) DeepCopy() *ModelCacheSpec {
if in == nil {
return nil
}
out := new(ModelCacheSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *OverridesSpec) DeepCopyInto(out *OverridesSpec) {
*out = *in
if in.ProfilingJob != nil {
in, out := &in.ProfilingJob, &out.ProfilingJob
*out = new(v1.JobSpec)
(*in).DeepCopyInto(*out)
}
if in.DGD != nil {
in, out := &in.DGD, &out.DGD
*out = new(runtime.RawExtension)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OverridesSpec.
func (in *OverridesSpec) DeepCopy() *OverridesSpec {
if in == nil {
return nil
}
out := new(OverridesSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ParetoConfig) DeepCopyInto(out *ParetoConfig) {
*out = *in
in.Config.DeepCopyInto(&out.Config)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ParetoConfig.
func (in *ParetoConfig) DeepCopy() *ParetoConfig {
if in == nil {
return nil
}
out := new(ParetoConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PlannerSpec) DeepCopyInto(out *PlannerSpec) {
*out = *in
if in.PlannerPreDeploymentSweeping != nil {
in, out := &in.PlannerPreDeploymentSweeping, &out.PlannerPreDeploymentSweeping
*out = new(PlannerPreDeploymentSweepMode)
**out = **in
}
if in.PlannerArgsList != nil {
in, out := &in.PlannerArgsList, &out.PlannerArgsList
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlannerSpec.
func (in *PlannerSpec) DeepCopy() *PlannerSpec {
if in == nil {
return nil
}
out := new(PlannerSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ProfilingResultsStatus) DeepCopyInto(out *ProfilingResultsStatus) {
*out = *in
if in.Pareto != nil {
in, out := &in.Pareto, &out.Pareto
*out = make([]ParetoConfig, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.SelectedConfig != nil {
in, out := &in.SelectedConfig, &out.SelectedConfig
*out = new(runtime.RawExtension)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProfilingResultsStatus.
func (in *ProfilingResultsStatus) DeepCopy() *ProfilingResultsStatus {
if in == nil {
return nil
}
out := new(ProfilingResultsStatus)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SLASpec) DeepCopyInto(out *SLASpec) {
*out = *in
if in.TTFT != nil {
in, out := &in.TTFT, &out.TTFT
*out = new(float64)
**out = **in
}
if in.ITL != nil {
in, out := &in.ITL, &out.ITL
*out = new(float64)
**out = **in
}
if in.E2ELatency != nil {
in, out := &in.E2ELatency, &out.E2ELatency
*out = new(float64)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SLASpec.
func (in *SLASpec) DeepCopy() *SLASpec {
if in == nil {
return nil
}
out := new(SLASpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *WorkloadSpec) DeepCopyInto(out *WorkloadSpec) {
*out = *in
if in.ISL != nil {
in, out := &in.ISL, &out.ISL
*out = new(int32)
**out = **in
}
if in.OSL != nil {
in, out := &in.OSL, &out.OSL
*out = new(int32)
**out = **in
}
if in.Concurrency != nil {
in, out := &in.Concurrency, &out.Concurrency
*out = new(float64)
**out = **in
}
if in.RequestRate != nil {
in, out := &in.RequestRate, &out.RequestRate
*out = new(float64)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkloadSpec.
func (in *WorkloadSpec) DeepCopy() *WorkloadSpec {
if in == nil {
return nil
}
out := new(WorkloadSpec)
in.DeepCopyInto(out)
return out
}
......@@ -56,6 +56,7 @@ import (
semver "github.com/Masterminds/semver/v3"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/controller"
commonController "github.com/ai-dynamo/dynamo/deploy/operator/internal/controller_common"
......@@ -123,6 +124,7 @@ func init() {
utilruntime.Must(istioclientsetscheme.AddToScheme(scheme))
utilruntime.Must(gaiev1.Install(scheme))
utilruntime.Must(nvidiacomv1beta1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}
......@@ -744,6 +746,13 @@ func main() {
os.Exit(1)
}
if err = ctrl.NewWebhookManagedBy(mgr).
For(&nvidiacomv1alpha1.DynamoGraphDeploymentRequest{}).
Complete(); err != nil {
setupLog.Error(err, "unable to register conversion webhook", "webhook", "DynamoGraphDeploymentRequest-conversion")
os.Exit(1)
}
setupLog.Info("Validation webhooks registered successfully")
// Register defaulting (mutating) webhook handlers
......
This source diff could not be displayed because it is too large. You can view the blob instead.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Post-process api-reference.md to deduplicate anchors between v1alpha1 and v1beta1.
crd-ref-docs generates anchors solely from type names, so types that exist in both
API versions get identical anchors (e.g. #dynamographdeploymentrequest). In standard
Markdown renderers the first occurrence wins, meaning v1beta1 links resolve to the
v1alpha1 section. This script prepends "v1beta1 " to the affected headings in the
v1beta1 section and updates all intra-section links to match the new anchors.
"""
import re
import sys
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <api-reference.md>", file=sys.stderr)
sys.exit(1)
path = sys.argv[1]
content = open(path).read()
marker = "## nvidia.com/v1beta1"
idx = content.find(marker)
if idx == -1:
print("Warning: v1beta1 section not found, skipping anchor fix", file=sys.stderr)
sys.exit(0)
alpha_part = content[:idx]
beta_part = content[idx:]
# Types whose names collide between v1alpha1 and v1beta1.
# Add to this list if future versions introduce additional same-named types.
duplicate_types = [
"DynamoGraphDeploymentRequest",
"DynamoGraphDeploymentRequestSpec",
"DynamoGraphDeploymentRequestStatus",
]
for t in duplicate_types:
anchor = t.lower()
# Rename section headings: #### TypeName → #### v1beta1 TypeName
beta_part = re.sub(
r"(####\s+)" + re.escape(t) + r"(\s*$)",
r"\1v1beta1 " + t + r"\2",
beta_part,
flags=re.MULTILINE,
)
# Update markdown links: (#anchor) → (#v1beta1-anchor)
beta_part = beta_part.replace(f"(#{anchor})", f"(#v1beta1-{anchor})")
open(path, "w").write(alpha_part + beta_part)
print(f"✅ Fixed duplicate anchors in {path}")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment