Unverified Commit 1a5016b0 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat: add subComponentType in DGD API and uptake in planner (#3200)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent 13156361
...@@ -216,20 +216,6 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ ...@@ -216,20 +216,6 @@ COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
# Install prometheus
ARG PROM_VERSION=3.4.1
RUN ARCH=$(dpkg --print-architecture) && \
case "$ARCH" in \
amd64) PLATFORM=linux-amd64 ;; \
arm64) PLATFORM=linux-arm64 ;; \
*) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
esac && \
curl -fsSL --retry 5 --retry-delay 5 "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \
| tar -xz -C /tmp && \
mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \
chmod +x /usr/local/bin/prometheus && \
rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}"
# Copy UCX from dev image as plugin for NIXL # Copy UCX from dev image as plugin for NIXL
# Copy NIXL source from devr image # Copy NIXL source from devr image
# Copy dynamo wheels for gitlab artifacts # Copy dynamo wheels for gitlab artifacts
......
...@@ -683,7 +683,7 @@ spec: ...@@ -683,7 +683,7 @@ spec:
Typically corresponds to a component defined in the packaged Dynamo artifacts. Typically corresponds to a component defined in the packaged Dynamo artifacts.
type: string type: string
dynamoNamespace: dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive) description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string type: string
dynamoTag: dynamoTag:
description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"' description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"'
...@@ -816,8 +816,9 @@ spec: ...@@ -816,8 +816,9 @@ spec:
type: object type: object
extraPodSpec: extraPodSpec:
description: |- description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced ExtraPodSpec allows to override the main pod spec configuration.
customization (tolerations, node selectors, affinity, etc.). It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties: properties:
activeDeadlineSeconds: activeDeadlineSeconds:
description: |- description: |-
...@@ -10239,7 +10240,7 @@ spec: ...@@ -10239,7 +10240,7 @@ spec:
type: object type: object
type: object type: object
serviceName: serviceName:
description: contains the name of the component description: The name of the component
type: string type: string
sharedMemory: sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
...@@ -10253,6 +10254,9 @@ spec: ...@@ -10253,6 +10254,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
type: object type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object type: object
status: status:
description: Status reflects the current observed state of the component deployment. description: Status reflects the current observed state of the component deployment.
......
...@@ -785,7 +785,7 @@ spec: ...@@ -785,7 +785,7 @@ spec:
description: ComponentType indicates the role of this component (for example, "main"). description: ComponentType indicates the role of this component (for example, "main").
type: string type: string
dynamoNamespace: dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive) description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string type: string
envFromSecret: envFromSecret:
description: |- description: |-
...@@ -915,8 +915,9 @@ spec: ...@@ -915,8 +915,9 @@ spec:
type: object type: object
extraPodSpec: extraPodSpec:
description: |- description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced ExtraPodSpec allows to override the main pod spec configuration.
customization (tolerations, node selectors, affinity, etc.). It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties: properties:
activeDeadlineSeconds: activeDeadlineSeconds:
description: |- description: |-
...@@ -10338,7 +10339,7 @@ spec: ...@@ -10338,7 +10339,7 @@ spec:
type: object type: object
type: object type: object
serviceName: serviceName:
description: contains the name of the component description: The name of the component
type: string type: string
sharedMemory: sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
...@@ -10352,6 +10353,9 @@ spec: ...@@ -10352,6 +10353,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
type: object type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object type: object
description: |- description: |-
Services allows per-service overrides of the component deployment settings. Services allows per-service overrides of the component deployment settings.
......
...@@ -27,9 +27,14 @@ spec: ...@@ -27,9 +27,14 @@ spec:
any: true any: true
{{- end }} {{- end }}
podMetricsEndpoints: podMetricsEndpoints:
- interval: 30s - interval: 5s
path: /metrics path: /metrics
port: http port: http
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_label_nvidia_com_dynamo_namespace
targetLabel: dynamo_namespace
selector: selector:
matchLabels: matchLabels:
nvidia.com/dynamo-component-type: frontend nvidia.com/dynamo-component-type: frontend
...@@ -49,7 +54,7 @@ spec: ...@@ -49,7 +54,7 @@ spec:
any: true any: true
{{- end }} {{- end }}
podMetricsEndpoints: podMetricsEndpoints:
- interval: 30s - interval: 5s
path: /metrics path: /metrics
port: system port: system
selector: selector:
...@@ -71,7 +76,7 @@ spec: ...@@ -71,7 +76,7 @@ spec:
any: true any: true
{{- end }} {{- end }}
podMetricsEndpoints: podMetricsEndpoints:
- interval: 30s - interval: 5s
path: /metrics path: /metrics
port: metrics port: metrics
selector: selector:
......
...@@ -73,6 +73,9 @@ type DynamoComponentDeploymentSharedSpec struct { ...@@ -73,6 +73,9 @@ type DynamoComponentDeploymentSharedSpec struct {
// ComponentType indicates the role of this component (for example, "main"). // ComponentType indicates the role of this component (for example, "main").
ComponentType string `json:"componentType,omitempty"` ComponentType string `json:"componentType,omitempty"`
// SubComponentType indicates the sub-role of this component (for example, "prefill").
SubComponentType string `json:"subComponentType,omitempty"`
// Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) // Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
DynamoNamespace *string `json:"dynamoNamespace,omitempty"` DynamoNamespace *string `json:"dynamoNamespace,omitempty"`
......
...@@ -683,7 +683,7 @@ spec: ...@@ -683,7 +683,7 @@ spec:
Typically corresponds to a component defined in the packaged Dynamo artifacts. Typically corresponds to a component defined in the packaged Dynamo artifacts.
type: string type: string
dynamoNamespace: dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive) description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string type: string
dynamoTag: dynamoTag:
description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"' description: 'contains the tag of the DynamoComponent: for example, "my_package:MyService"'
...@@ -816,8 +816,9 @@ spec: ...@@ -816,8 +816,9 @@ spec:
type: object type: object
extraPodSpec: extraPodSpec:
description: |- description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced ExtraPodSpec allows to override the main pod spec configuration.
customization (tolerations, node selectors, affinity, etc.). It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties: properties:
activeDeadlineSeconds: activeDeadlineSeconds:
description: |- description: |-
...@@ -10239,7 +10240,7 @@ spec: ...@@ -10239,7 +10240,7 @@ spec:
type: object type: object
type: object type: object
serviceName: serviceName:
description: contains the name of the component description: The name of the component
type: string type: string
sharedMemory: sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
...@@ -10253,6 +10254,9 @@ spec: ...@@ -10253,6 +10254,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
type: object type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object type: object
status: status:
description: Status reflects the current observed state of the component deployment. description: Status reflects the current observed state of the component deployment.
......
...@@ -785,7 +785,7 @@ spec: ...@@ -785,7 +785,7 @@ spec:
description: ComponentType indicates the role of this component (for example, "main"). description: ComponentType indicates the role of this component (for example, "main").
type: string type: string
dynamoNamespace: dynamoNamespace:
description: dynamo namespace of the service (allows to override the dynamo namespace of the service defined in annotations inside the dynamo archive) description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
type: string type: string
envFromSecret: envFromSecret:
description: |- description: |-
...@@ -915,8 +915,9 @@ spec: ...@@ -915,8 +915,9 @@ spec:
type: object type: object
extraPodSpec: extraPodSpec:
description: |- description: |-
ExtraPodSpec merges additional fields into the generated PodSpec for advanced ExtraPodSpec allows to override the main pod spec configuration.
customization (tolerations, node selectors, affinity, etc.). It is a k8s standard PodSpec. It also contains a MainContainer (standard k8s Container) field
that allows overriding the main container configuration.
properties: properties:
activeDeadlineSeconds: activeDeadlineSeconds:
description: |- description: |-
...@@ -10338,7 +10339,7 @@ spec: ...@@ -10338,7 +10339,7 @@ spec:
type: object type: object
type: object type: object
serviceName: serviceName:
description: contains the name of the component description: The name of the component
type: string type: string
sharedMemory: sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
...@@ -10352,6 +10353,9 @@ spec: ...@@ -10352,6 +10353,9 @@ spec:
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
type: object type: object
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
type: object type: object
description: |- description: |-
Services allows per-service overrides of the component deployment settings. Services allows per-service overrides of the component deployment settings.
......
...@@ -37,6 +37,7 @@ const ( ...@@ -37,6 +37,7 @@ const (
KubeLabelDynamoNamespace = "nvidia.com/dynamo-namespace" KubeLabelDynamoNamespace = "nvidia.com/dynamo-namespace"
KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type" KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoComponentType = "nvidia.com/dynamo-component-type" KubeLabelDynamoComponentType = "nvidia.com/dynamo-component-type"
KubeLabelDynamoSubComponentType = "nvidia.com/dynamo-sub-component-type"
KubeLabelValueFalse = "false" KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true" KubeLabelValueTrue = "true"
......
...@@ -1154,6 +1154,10 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex ...@@ -1154,6 +1154,10 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
podLabels[commonconsts.KubeLabelDynamoComponentType] = opt.dynamoComponentDeployment.Spec.ComponentType podLabels[commonconsts.KubeLabelDynamoComponentType] = opt.dynamoComponentDeployment.Spec.ComponentType
} }
if opt.dynamoComponentDeployment.Spec.SubComponentType != "" {
podLabels[commonconsts.KubeLabelDynamoSubComponentType] = opt.dynamoComponentDeployment.Spec.SubComponentType
}
podAnnotations := make(map[string]string) podAnnotations := make(map[string]string)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled) kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
......
...@@ -698,9 +698,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -698,9 +698,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Value: "test_value_from_dynamo_component_deployment_spec", Value: "test_value_from_dynamo_component_deployment_spec",
}, },
}, },
ComponentType: string(commonconsts.ComponentTypeWorker), ComponentType: string(commonconsts.ComponentTypeWorker),
ServiceName: "test-lws-deploy-service", SubComponentType: "test-sub-component",
DynamoNamespace: &[]string{"default"}[0], ServiceName: "test-lws-deploy-service",
DynamoNamespace: &[]string{"default"}[0],
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
}, },
...@@ -783,6 +784,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -783,6 +784,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"role": "leader", "role": "leader",
"nvidia.com/label1": "label1", "nvidia.com/label1": "label1",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker, commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "", commonconsts.KubeLabelDynamoGraphDeploymentName: "",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
...@@ -893,6 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -893,6 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"role": "worker", "role": "worker",
"nvidia.com/label1": "label1", "nvidia.com/label1": "label1",
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker, commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "", commonconsts.KubeLabelDynamoGraphDeploymentName: "",
}, },
Annotations: map[string]string{ Annotations: map[string]string{
......
...@@ -100,10 +100,6 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr ...@@ -100,10 +100,6 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
if err = r.Get(ctx, req.NamespacedName, dynamoDeployment); err != nil { if err = r.Get(ctx, req.NamespacedName, dynamoDeployment); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err) return ctrl.Result{}, client.IgnoreNotFound(err)
} }
if err != nil {
// not found, nothing to do
return ctrl.Result{}, nil
}
defer func() { defer func() {
if err != nil { if err != nil {
...@@ -129,7 +125,7 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr ...@@ -129,7 +125,7 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
err = r.Status().Update(ctx, dynamoDeployment) err = r.Status().Update(ctx, dynamoDeployment)
if err != nil { if err != nil {
logger.Error(err, "Unable to update the CRD status", "crd", req.NamespacedName) logger.Error(err, "Unable to update the CRD status", "crd", req.NamespacedName, "state", state, "reason", reason, "message", message)
} }
logger.Info("Reconciliation done") logger.Info("Reconciliation done")
}() }()
......
...@@ -992,6 +992,9 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, ...@@ -992,6 +992,9 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
if component.ComponentType != "" { if component.ComponentType != "" {
labels[commonconsts.KubeLabelDynamoComponentType] = component.ComponentType labels[commonconsts.KubeLabelDynamoComponentType] = component.ComponentType
} }
if component.SubComponentType != "" {
labels[commonconsts.KubeLabelDynamoSubComponentType] = component.SubComponentType
}
setMetricsLabels(labels, dynamoDeployment) setMetricsLabels(labels, dynamoDeployment)
if component.Labels != nil { if component.Labels != nil {
err := mergo.Merge(&labels, component.Labels, mergo.WithOverride) err := mergo.Merge(&labels, component.Labels, mergo.WithOverride)
......
...@@ -62,9 +62,10 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) { ...@@ -62,9 +62,10 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{ Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{
"service1": { "service1": {
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoNamespace: &[]string{"default"}[0], DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend", ComponentType: "frontend",
Replicas: &[]int32{3}[0], SubComponentType: "test-sub-component",
Replicas: &[]int32{3}[0],
Resources: &common.Resources{ Resources: &common.Resources{
Requests: &common.ResourceItem{ Requests: &common.ResourceItem{
CPU: "1", CPU: "1",
...@@ -106,10 +107,11 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) { ...@@ -106,10 +107,11 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
}, },
Spec: v1alpha1.DynamoComponentDeploymentSpec{ Spec: v1alpha1.DynamoComponentDeploymentSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ServiceName: "service1", ServiceName: "service1",
DynamoNamespace: &[]string{"default"}[0], DynamoNamespace: &[]string{"default"}[0],
ComponentType: "frontend", ComponentType: "frontend",
Replicas: &[]int32{3}[0], SubComponentType: "test-sub-component",
Replicas: &[]int32{3}[0],
Resources: &common.Resources{ Resources: &common.Resources{
Requests: &common.ResourceItem{ Requests: &common.ResourceItem{
CPU: "1", CPU: "1",
...@@ -1088,7 +1090,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1088,7 +1090,8 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{ Services: map[string]*v1alpha1.DynamoComponentDeploymentOverridesSpec{
"Frontend": { "Frontend": {
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: "frontend", // Frontend component ComponentType: "frontend", // Frontend component
SubComponentType: "test-sub-component",
ExtraPodMetadata: &common.ExtraPodMetadata{ ExtraPodMetadata: &common.ExtraPodMetadata{
Annotations: map[string]string{ Annotations: map[string]string{
"nvidia.com/annotation1": "annotation1", "nvidia.com/annotation1": "annotation1",
...@@ -1240,6 +1243,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1240,6 +1243,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend", commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-frontend",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue, commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeFrontend, commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeFrontend,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment", commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
"nvidia.com/label1": "label1", "nvidia.com/label1": "label1",
"nvidia.com/label2": "label2", "nvidia.com/label2": "label2",
...@@ -1642,8 +1646,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1642,8 +1646,9 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
"nvidia.com/label2": "label2", "nvidia.com/label2": "label2",
}, },
}, },
Replicas: &[]int32{5}[0], Replicas: &[]int32{5}[0],
ComponentType: commonconsts.ComponentTypeWorker, ComponentType: commonconsts.ComponentTypeWorker,
SubComponentType: "test-sub-component",
ExtraPodSpec: &common.ExtraPodSpec{ ExtraPodSpec: &common.ExtraPodSpec{
MainContainer: &corev1.Container{ MainContainer: &corev1.Container{
Image: "worker-image", Image: "worker-image",
...@@ -1767,6 +1772,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1767,6 +1772,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "worker-ldr", Name: "worker-ldr",
Labels: map[string]string{ Labels: map[string]string{
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker, commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue, commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-ldr", commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-ldr",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment", commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
...@@ -1917,6 +1923,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1917,6 +1923,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "worker-wkr", Name: "worker-wkr",
Labels: map[string]string{ Labels: map[string]string{
commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker, commonconsts.KubeLabelDynamoComponentType: commonconsts.ComponentTypeWorker,
commonconsts.KubeLabelDynamoSubComponentType: "test-sub-component",
commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue, commonconsts.KubeLabelMetricsEnabled: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-wkr", commonconsts.KubeLabelDynamoSelector: "test-dynamo-graph-deployment-worker-wkr",
commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment", commonconsts.KubeLabelDynamoGraphDeploymentName: "test-dynamo-graph-deployment",
......
...@@ -14,7 +14,12 @@ spec: ...@@ -14,7 +14,12 @@ spec:
podMetricsEndpoints: podMetricsEndpoints:
- port: http - port: http
path: /metrics path: /metrics
interval: 2s interval: 5s
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_label_nvidia_com_dynamo_namespace
targetLabel: dynamo_namespace
namespaceSelector: namespaceSelector:
matchNames: matchNames:
- ${NAMESPACE} - ${NAMESPACE}
...@@ -14,7 +14,7 @@ spec: ...@@ -14,7 +14,7 @@ spec:
podMetricsEndpoints: podMetricsEndpoints:
- port: metrics - port: metrics
path: /metrics path: /metrics
interval: 2s interval: 5s
namespaceSelector: namespaceSelector:
matchNames: matchNames:
- $NAMESPACE - $NAMESPACE
\ No newline at end of file
...@@ -14,7 +14,7 @@ spec: ...@@ -14,7 +14,7 @@ spec:
podMetricsEndpoints: podMetricsEndpoints:
- port: system - port: system
path: /metrics path: /metrics
interval: 2s interval: 5s
namespaceSelector: namespaceSelector:
matchNames: matchNames:
- ${NAMESPACE} - ${NAMESPACE}
...@@ -9,10 +9,12 @@ Quick deployment guide for the disaggregated planner with automatic scaling. ...@@ -9,10 +9,12 @@ Quick deployment guide for the disaggregated planner with automatic scaling.
**Components:** **Components:**
- **Frontend**: Serves requests and exposes `/metrics` - **Frontend**: Serves requests and exposes `/metrics`
- **Prometheus**: Scrapes frontend metrics every 5 seconds - **Prometheus**: Scrapes frontend metrics every adjustment interval
- **Planner**: Queries Prometheus and adjusts worker scaling every 60 seconds - **Planner**: Queries Prometheus and adjusts worker scaling every adjustment interval
- **Workers**: prefill and backend workers handle inference - **Workers**: prefill and backend workers handle inference
The adjustment interval can be defined in the planner manifest as an argument. The default interval value can be found in this [file](/components/planner/src/dynamo/planner/defaults.py).
```mermaid ```mermaid
flowchart LR flowchart LR
Frontend --"/metrics"--> Prometheus Frontend --"/metrics"--> Prometheus
...@@ -25,6 +27,7 @@ flowchart LR ...@@ -25,6 +27,7 @@ flowchart LR
- Kubernetes cluster with GPU nodes - Kubernetes cluster with GPU nodes
- [Pre-Deployment Profiling](/docs/benchmarks/pre_deployment_profiling.md) completed and its results saved to `dynamo-pvc` PVC. - [Pre-Deployment Profiling](/docs/benchmarks/pre_deployment_profiling.md) completed and its results saved to `dynamo-pvc` PVC.
- Prefill and decode worker uses the best parallelization mapping suggested by the pre-deployment profiling script. - Prefill and decode worker uses the best parallelization mapping suggested by the pre-deployment profiling script.
- [kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.
> [!NOTE] > [!NOTE]
> **Important**: The profiling that occurs before Planner deployment requires additional Kubernetes manifests (ServiceAccount, Role, RoleBinding, PVC) that are not included in standard Dynamo deployments. Apply these manifests in the same namespace as `$NAMESPACE`. For a complete setup, start with the [Quick Start guide](/deploy/utils/README.md#quick-start), which provides a fully encapsulated deployment including all required manifests. > **Important**: The profiling that occurs before Planner deployment requires additional Kubernetes manifests (ServiceAccount, Role, RoleBinding, PVC) that are not included in standard Dynamo deployments. Apply these manifests in the same namespace as `$NAMESPACE`. For a complete setup, start with the [Quick Start guide](/deploy/utils/README.md#quick-start), which provides a fully encapsulated deployment including all required manifests.
...@@ -50,7 +53,6 @@ Expected pods (all should be `1/1 Running`): ...@@ -50,7 +53,6 @@ Expected pods (all should be `1/1 Running`):
``` ```
# For vLLM: # For vLLM:
vllm-disagg-planner-frontend-* 1/1 Running vllm-disagg-planner-frontend-* 1/1 Running
vllm-disagg-planner-prometheus-* 1/1 Running
vllm-disagg-planner-planner-* 1/1 Running vllm-disagg-planner-planner-* 1/1 Running
vllm-disagg-planner-backend-* 1/1 Running vllm-disagg-planner-backend-* 1/1 Running
vllm-disagg-planner-prefill-* 1/1 Running vllm-disagg-planner-prefill-* 1/1 Running
...@@ -103,8 +105,8 @@ kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-planner --tail=10 ...@@ -103,8 +105,8 @@ kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-planner --tail=10
**Connection Issues:** **Connection Issues:**
```bash ```bash
# Verify Prometheus is accessible (runs on port 8000) # Verify Prometheus is accessible
kubectl port-forward -n $NAMESPACE deployment/vllm-disagg-planner-prometheus 9090:8000 kubectl port-forward svc/prometheus-kube-prometheus-prometheus -n monitoring 9090:9090
curl "http://localhost:9090/api/v1/query?query=up" curl "http://localhost:9090/api/v1/query?query=up"
``` ```
...@@ -119,3 +121,11 @@ curl http://localhost:8000/metrics | grep nv_llm_http_service ...@@ -119,3 +121,11 @@ curl http://localhost:8000/metrics | grep nv_llm_http_service
- Large models can take 10+ minutes to initialize - Large models can take 10+ minutes to initialize
- Check worker logs: `kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-backend` - Check worker logs: `kubectl logs -n $NAMESPACE deployment/vllm-disagg-planner-backend`
- Ensure GPU resources are available for workers - Ensure GPU resources are available for workers
**Unknown Field subComponentType:**
If you encounter the following error when attempting to apply the deployment:
```bash
Error from server (BadRequest): error when creating "components/backends/vllm/deploy/disagg.yaml": DynamoGraphDeployment in version "v1alpha1" cannot be handled as a DynamoGraphDeployment: strict decoding error: unknown field "spec.services.DecodeWorker.subComponentType", unknown field "spec.services.PrefillWorker.subComponentType"
```
This is because the `subComponentType` field has only been added in newer versions of the DynamoGraphDeployment CRD (> 0.5.0). You can upgrade the CRD version by following the instructions [here](/docs/kubernetes/installation_guide.md).
...@@ -55,9 +55,6 @@ spec: ...@@ -55,9 +55,6 @@ spec:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: planner componentType: planner
replicas: 1 replicas: 1
envs:
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe: livenessProbe:
exec: exec:
command: command:
...@@ -98,47 +95,11 @@ spec: ...@@ -98,47 +95,11 @@ spec:
--adjustment-interval=60 --adjustment-interval=60
--prometheus-port=9085 --prometheus-port=9085
--no-correction --no-correction
Prometheus:
dynamoNamespace: vllm-disagg-planner
componentType: main
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
- name: PROMETHEUS_PORT
value: "8000"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
httpGet: httpGet:
...@@ -195,6 +156,7 @@ spec: ...@@ -195,6 +156,7 @@ spec:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill
replicas: 1 replicas: 1
livenessProbe: livenessProbe:
httpGet: httpGet:
......
...@@ -11,8 +11,6 @@ spec: ...@@ -11,8 +11,6 @@ spec:
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}' value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:9090"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
- name: DYNAMO_NAMESPACE - name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner" value: "vllm-disagg-planner"
- name: PROMETHEUS_PORT
value: "8000"
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
...@@ -63,45 +61,11 @@ spec: ...@@ -63,45 +61,11 @@ spec:
--itl=0.01 --itl=0.01
--load-predictor=constant --load-predictor=constant
--no-correction --no-correction
Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently.
dynamoNamespace: vllm-disagg-planner
componentType: frontend
replicas: 1
envs:
- name: PYTHONPATH
value: "/workspace/components/planner/src"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 30
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.planner.prometheus"
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode
replicas: 1 replicas: 1
resources: resources:
limits: limits:
...@@ -125,6 +89,7 @@ spec: ...@@ -125,6 +89,7 @@ spec:
dynamoNamespace: vllm-disagg-planner dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill
replicas: 1 replicas: 1
resources: resources:
limits: limits:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment