common.go

/*
 * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package v1alpha1

import (
	"encoding/json"

	autoscalingv2 "k8s.io/api/autoscaling/v2"
	corev1 "k8s.io/api/core/v1"
	"k8s.io/apimachinery/pkg/api/resource"
)

// +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required"
type PVC struct {
	// Create indicates to create a new PVC
	Create *bool `json:"create,omitempty"`
	// Name is the name of the PVC
	// +kubebuilder:validation:Required
	Name *string `json:"name,omitempty"`
	// StorageClass to be used for PVC creation. Required when create is true.
	StorageClass string `json:"storageClass,omitempty"`
	// Size of the volume in Gi, used during PVC creation. Required when create is true.
	Size resource.Quantity `json:"size,omitempty"`
	// VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
	VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
}

// VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
type VolumeMount struct {
	// Name references a PVC name defined in the top-level PVCs map
	// +kubebuilder:validation:Required
	Name string `json:"name,omitempty"`
	// MountPoint specifies where to mount the volume.
	// If useAsCompilationCache is true and mountPoint is not specified,
	// a backend-specific default will be used.
	MountPoint string `json:"mountPoint,omitempty"`
	// UseAsCompilationCache indicates this volume should be used as a compilation cache.
	// When true, backend-specific environment variables will be set and default mount points may be used.
	// +kubebuilder:default=false
	UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
}

// Deprecated: This field is deprecated and ignored. Use DynamoGraphDeploymentScalingAdapter
// with HPA, KEDA, or Planner for autoscaling instead. See docs/kubernetes/autoscaling.md
// for migration guidance. This field will be removed in a future API version.
type Autoscaling struct {
	// Deprecated: This field is ignored.
	Enabled bool `json:"enabled,omitempty"`
	// Deprecated: This field is ignored.
	MinReplicas int `json:"minReplicas,omitempty"`
	// Deprecated: This field is ignored.
	MaxReplicas int `json:"maxReplicas,omitempty"`
	// Deprecated: This field is ignored.
	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
	// Deprecated: This field is ignored.
	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}

// +kubebuilder:validation:XValidation:rule="!(has(self.disabled) && self.disabled && has(self.size))",message="sharedMemory.size must not be set when sharedMemory.disabled is true"
type SharedMemorySpec struct {
	Disabled bool              `json:"disabled,omitempty"`
	Size     resource.Quantity `json:"size,omitempty"`
}

type ResourceItem struct {
	// CPU specifies the CPU resource request/limit (e.g., "1000m", "2")
	CPU string `json:"cpu,omitempty"`
	// Memory specifies the memory resource request/limit (e.g., "4Gi", "8Gi")
	Memory string `json:"memory,omitempty"`
	// GPU indicates the number of GPUs to request.
	// Total number of GPUs is NumberOfNodes * GPU in case of multinode deployment.
	GPU string `json:"gpu,omitempty"`
	// GPUType can specify a custom GPU type, e.g. "gpu.intel.com/xe"
	// By default if not specified, the GPU type is "nvidia.com/gpu"
	GPUType string `json:"gpuType,omitempty"`
	// Custom specifies additional custom resource requests/limits
	Custom map[string]string `json:"custom,omitempty"`
}

// Resources defines requested and limits for a component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
type Resources struct {
	// Requests specifies the minimum resources required by the component
	Requests *ResourceItem `json:"requests,omitempty"`
	// Limits specifies the maximum resources allowed for the component
	Limits *ResourceItem `json:"limits,omitempty"`
	// Claims specifies resource claims for dynamic resource allocation
	Claims []corev1.ResourceClaim `json:"claims,omitempty"`
}

type DeploymentTargetHPAConf struct {
	CPU         *int32  `json:"cpu,omitempty"`
	GPU         *int32  `json:"gpu,omitempty"`
	Memory      *string `json:"memory,omitempty"`
	QPS         *int64  `json:"qps,omitempty"`
	MinReplicas *int32  `json:"min_replicas,omitempty"`
	MaxReplicas *int32  `json:"max_replicas,omitempty"`
}

type LabelItemSchema struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

type ExtraPodMetadata struct {
	Annotations map[string]string `json:"annotations,omitempty"`
	Labels      map[string]string `json:"labels,omitempty"`
}

type ExtraPodSpec struct {
	*corev1.PodSpec `json:",inline"`
	MainContainer   *corev1.Container `json:"mainContainer,omitempty"`
}

// MarshalJSON implements json.Marshaler for ExtraPodSpec.
//
// corev1.PodSpec.Containers is declared without omitempty, so a nil slice
// serializes as "containers": null.  The CRD structural schema defines
// containers as type: array and rejects null.  This custom marshaller shadows
// the Containers field with an omitempty-tagged copy so that nil/empty
// Containers are omitted from the JSON output entirely.
func (e ExtraPodSpec) MarshalJSON() ([]byte, error) {
	// Type alias strips methods from corev1.PodSpec, preventing infinite
	// recursion through any MarshalJSON defined on PodSpec.
	type PodSpecAlias corev1.PodSpec

	aux := struct {
		*PodSpecAlias `json:",inline"`
		Containers    []corev1.Container `json:"containers,omitempty"`
		MainContainer *corev1.Container  `json:"mainContainer,omitempty"`
	}{}

	if e.PodSpec != nil {
		a := PodSpecAlias(*e.PodSpec)
		aux.PodSpecAlias = &a
		aux.Containers = e.PodSpec.Containers
	}
	aux.MainContainer = e.MainContainer

	return json.Marshal(aux)
}

// GPUMemoryServiceMode selects the GMS deployment topology.
type GPUMemoryServiceMode string

const (
	// GMSModeIntraPod runs GMS as a sidecar within the same pod.
	GMSModeIntraPod GPUMemoryServiceMode = "intraPod"
	// GMSModeInterPod runs GMS as a separate weight server pod and one or more
	// engine pods per rank, sharing GPUs via DRA ResourceClaims and a shared
	// hostPath volume for UDS sockets. Only valid on FailoverSpec; the
	// GPUMemoryServiceSpec sidecar always runs in intraPod mode.
	GMSModeInterPod GPUMemoryServiceMode = "interPod"
)

// GPUMemoryServiceSpec configures the GPU Memory Service (GMS) sidecar for a worker component.
// When enabled, the operator injects a GMS sidecar that provides shared GPU memory access
// via DRA (Dynamic Resource Allocation). The sidecar runs two GMS processes per GPU
// (weights + kv_cache) and communicates with the main container over UDS sockets.
type GPUMemoryServiceSpec struct {
	// Enabled activates the GMS sidecar. GPU resources on the main container
	// are replaced with a DRA ResourceClaim for shared GPU access.
	Enabled bool `json:"enabled"`
	// Mode selects the GMS deployment topology.
	// +kubebuilder:default=intraPod
	// +kubebuilder:validation:Enum=intraPod;interPod
	// +optional
	Mode GPUMemoryServiceMode `json:"mode,omitempty"`
	// DeviceClassName is the DRA DeviceClass to request GPUs from.
	// +kubebuilder:default="gpu.nvidia.com"
	// +optional
	DeviceClassName string `json:"deviceClassName,omitempty"`
}

// FailoverSpec configures active-passive failover for a worker component.
// For intraPod mode: requires gpuMemoryService.enabled; the main container is cloned
// into engine containers (active + standby) within the same pod.
// For interPod mode: the operator creates a dedicated GMS weight server pod and
// multiple engine pods per rank that share GPUs via DRA resource claims.
type FailoverSpec struct {
	// Enabled activates failover mode.
	Enabled bool `json:"enabled"`
	// Mode selects the failover deployment topology.
	// intraPod: engine containers run within the same pod (requires gpuMemoryService.enabled).
	// interPod: a dedicated GMS weight server pod + engine pods per rank (requires Grove).
	// +kubebuilder:default=intraPod
	// +kubebuilder:validation:Enum=intraPod;interPod
	// +optional
	Mode GPUMemoryServiceMode `json:"mode,omitempty"`
	// NumShadows is the number of shadow (standby) engine pods per rank.
	// Total engine pods per rank = NumShadows + 1 (1 primary + NumShadows shadows).
	//
	// NumShadows is only meaningful for mode=interPod; intraPod uses a fixed
	// 1 primary + 1 shadow sidecar layout and any value other than 1 is
	// rejected at admission time.
	// +kubebuilder:default=1
	// +kubebuilder:validation:Minimum=1
	// +optional
	NumShadows int32 `json:"numShadows,omitempty"`
}

// ScalingAdapter configures whether a service uses the DynamoGraphDeploymentScalingAdapter
// for replica management. When enabled, the DGDSA owns the replicas field and
// external autoscalers (HPA, KEDA, Planner) can control scaling via the Scale subresource.
type ScalingAdapter struct {
	// Enabled indicates whether the ScalingAdapter should be enabled for this service.
	// When true, a DGDSA is created and owns the replicas field.
	// When false (default), no DGDSA is created and replicas can be modified directly in the DGD.
	// +optional
	// +kubebuilder:default=false
	Enabled bool `json:"enabled,omitempty"`
}

// CheckpointMode defines how checkpoint creation is handled
// +kubebuilder:validation:Enum=Auto;Manual
type CheckpointMode string

const (
	// CheckpointModeAuto means the DGD controller will automatically create a Checkpoint CR
	CheckpointModeAuto CheckpointMode = "Auto"
	// CheckpointModeManual means the user must create the Checkpoint CR themselves
	CheckpointModeManual CheckpointMode = "Manual"
)

// ServiceCheckpointConfig configures checkpointing for a DGD service
// +kubebuilder:validation:XValidation:rule="!self.enabled || (has(self.checkpointRef) && size(self.checkpointRef) > 0) || (has(self.identity) && has(self.identity.model) && has(self.identity.backendFramework))",message="When enabled, either checkpointRef or both identity.model and identity.backendFramework must be specified"
type ServiceCheckpointConfig struct {
	// Enabled indicates whether checkpointing is enabled for this service
	// +optional
	// +kubebuilder:default=false
	Enabled bool `json:"enabled,omitempty"`

	// Mode defines how checkpoint creation is handled
	// - Auto: DGD controller creates Checkpoint CR automatically
	// - Manual: User must create Checkpoint CR
	// +optional
	// +kubebuilder:default=Auto
	Mode CheckpointMode `json:"mode,omitempty"`

	// CheckpointRef references an existing DynamoCheckpoint CR by metadata.name.
	// If specified, this service's Identity is ignored and the referenced checkpoint is used directly.
	// +optional
	CheckpointRef *string `json:"checkpointRef,omitempty"`

	// Identity defines the checkpoint identity for hash computation
	// Used when Mode is Auto or when looking up existing checkpoints
	// Required when checkpointRef is not specified
	// +optional
	Identity *DynamoCheckpointIdentity `json:"identity,omitempty"`
}