Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ee3a8e42
Unverified
Commit
ee3a8e42
authored
Jul 23, 2025
by
julienmancuso
Committed by
GitHub
Jul 23, 2025
Browse files
feat: add initial Grove support (#2012)
parent
19a77ae7
Changes
34
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1114 additions
and
386 deletions
+1114
-386
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
...r/config/crd/bases/nvidia.com_dynamographdeployments.yaml
+16
-0
deploy/cloud/operator/config/rbac/role.yaml
deploy/cloud/operator/config/rbac/role.yaml
+12
-0
deploy/cloud/operator/go.mod
deploy/cloud/operator/go.mod
+17
-18
deploy/cloud/operator/go.sum
deploy/cloud/operator/go.sum
+36
-29
deploy/cloud/operator/internal/consts/consts.go
deploy/cloud/operator/internal/consts/consts.go
+4
-0
deploy/cloud/operator/internal/controller/common.go
deploy/cloud/operator/internal/controller/common.go
+0
-14
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
...ternal/controller/dynamocomponentdeployment_controller.go
+22
-207
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+20
-25
deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
...r/internal/controller/dynamographdeployment_controller.go
+178
-74
deploy/cloud/operator/internal/controller_common/predicate.go
...oy/cloud/operator/internal/controller_common/predicate.go
+16
-3
deploy/cloud/operator/internal/controller_common/resource.go
deploy/cloud/operator/internal/controller_common/resource.go
+104
-0
deploy/cloud/operator/internal/dynamo/graph.go
deploy/cloud/operator/internal/dynamo/graph.go
+278
-5
deploy/cloud/operator/internal/dynamo/graph_test.go
deploy/cloud/operator/internal/dynamo/graph_test.go
+401
-7
deploy/helm/chart/templates/deployment.yaml
deploy/helm/chart/templates/deployment.yaml
+10
-4
No files found.
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
View file @
ee3a8e42
...
@@ -148,6 +148,12 @@ spec:
...
@@ -148,6 +148,12 @@ spec:
stabilizationWindowSeconds
:
stabilizationWindowSeconds
:
format
:
int32
format
:
int32
type
:
integer
type
:
integer
tolerance
:
anyOf
:
-
type
:
integer
-
type
:
string
pattern
:
^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string
:
true
type
:
object
type
:
object
scaleUp
:
scaleUp
:
properties
:
properties
:
...
@@ -174,6 +180,12 @@ spec:
...
@@ -174,6 +180,12 @@ spec:
stabilizationWindowSeconds
:
stabilizationWindowSeconds
:
format
:
int32
format
:
int32
type
:
integer
type
:
integer
tolerance
:
anyOf
:
-
type
:
integer
-
type
:
string
pattern
:
^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string
:
true
type
:
object
type
:
object
type
:
object
type
:
object
enabled
:
enabled
:
...
@@ -1218,6 +1230,8 @@ spec:
...
@@ -1218,6 +1230,8 @@ spec:
-
port
-
port
type
:
object
type
:
object
type
:
object
type
:
object
stopSignal
:
type
:
string
type
:
object
type
:
object
livenessProbe
:
livenessProbe
:
properties
:
properties
:
...
@@ -1897,6 +1911,8 @@ spec:
...
@@ -1897,6 +1911,8 @@ spec:
-
port
-
port
type
:
object
type
:
object
type
:
object
type
:
object
stopSignal
:
type
:
string
type
:
object
type
:
object
livenessProbe
:
livenessProbe
:
properties
:
properties
:
...
...
deploy/cloud/operator/config/rbac/role.yaml
View file @
ee3a8e42
...
@@ -98,6 +98,18 @@ rules:
...
@@ -98,6 +98,18 @@ rules:
-
patch
-
patch
-
update
-
update
-
watch
-
watch
-
apiGroups
:
-
grove.io
resources
:
-
podgangsets
verbs
:
-
create
-
delete
-
get
-
list
-
patch
-
update
-
watch
-
apiGroups
:
-
apiGroups
:
-
leaderworkerset.x-k8s.io
-
leaderworkerset.x-k8s.io
resources
:
resources
:
...
...
deploy/cloud/operator/go.mod
View file @
ee3a8e42
...
@@ -6,27 +6,29 @@ toolchain go1.24.3
...
@@ -6,27 +6,29 @@ toolchain go1.24.3
require (
require (
emperror.dev/errors v0.8.1
emperror.dev/errors v0.8.1
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f
github.com/bsm/gomega v1.27.10
github.com/bsm/gomega v1.27.10
github.com/google/go-cmp v0.7.0
github.com/google/go-cmp v0.7.0
github.com/imdario/mergo v0.3.6
github.com/imdario/mergo v0.3.6
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/onsi/gomega v1.37.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
go.etcd.io/etcd/client/v3 v3.5.1
6
go.etcd.io/etcd/client/v3 v3.5.
2
1
istio.io/api v1.23.1
istio.io/api v1.23.1
istio.io/client-go v1.23.1
istio.io/client-go v1.23.1
k8s.io/api v0.3
2
.3
k8s.io/api v0.3
3
.3
k8s.io/apiextensions-apiserver v0.3
2
.3
k8s.io/apiextensions-apiserver v0.3
3
.3
k8s.io/apimachinery v0.3
2
.3
k8s.io/apimachinery v0.3
3
.3
k8s.io/client-go v0.3
2
.3
k8s.io/client-go v0.3
3
.3
k8s.io/utils v0.0.0-202
41210054802-24370beab758
k8s.io/utils v0.0.0-202
50502105355-0f33e8f1c979
sigs.k8s.io/controller-runtime v0.2
0.4
sigs.k8s.io/controller-runtime v0.2
1.0
sigs.k8s.io/lws v0.6.1
sigs.k8s.io/lws v0.6.1
volcano.sh/apis v1.11.0
volcano.sh/apis v1.11.0
)
)
require (
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
...
@@ -45,28 +47,24 @@ require (
...
@@ -45,28 +47,24 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.2
0.2
// indirect
github.com/prometheus/client_golang v1.2
2.0
// indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.
55
.0 // indirect
github.com/prometheus/common v0.
62
.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/stretchr/testify v1.10.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.etcd.io/etcd/api/v3 v3.5.1
6
// indirect
go.etcd.io/etcd/api/v3 v3.5.
2
1 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
// indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
...
@@ -79,7 +77,7 @@ require (
...
@@ -79,7 +77,7 @@ require (
golang.org/x/sys v0.33.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/term v0.32.0 // indirect
golang.org/x/term v0.32.0 // indirect
golang.org/x/text v0.25.0 // indirect
golang.org/x/text v0.25.0 // indirect
golang.org/x/time v0.
7
.0 // indirect
golang.org/x/time v0.
9
.0 // indirect
golang.org/x/tools v0.33.0 // indirect
golang.org/x/tools v0.33.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
...
@@ -90,8 +88,9 @@ require (
...
@@ -90,8 +88,9 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3
f // indirect
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2f
f // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
)
deploy/cloud/operator/go.sum
View file @
ee3a8e42
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f h1:2ePSNDm7/Tep8F99yCQVH8/vmn86L1cUzTbVlyNopmQ=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f/go.mod h1:nJL33lsBe+9xCcZLYkNYg1wucE4hJfa4ZfHm1zamuG0=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
...
@@ -45,8 +49,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
...
@@ -45,8 +49,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/gnostic-models v0.6.
8
h1:
yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I
=
github.com/google/gnostic-models v0.6.
9
h1:
MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw
=
github.com/google/gnostic-models v0.6.
8
/go.mod h1:
5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U
=
github.com/google/gnostic-models v0.6.
9
/go.mod h1:
CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw
=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
...
@@ -95,12 +99,12 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4
...
@@ -95,12 +99,12 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 h1:HZdPRm0ApWPg7F4sHgbqWkL+ddWfpTZsopm5HM/2g4o=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 h1:HZdPRm0ApWPg7F4sHgbqWkL+ddWfpTZsopm5HM/2g4o=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2/go.mod h1:3RiUkFmR9kmPZi9r/8a5jw0a9yg+LMmr7qa0wjqvSiI=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2/go.mod h1:3RiUkFmR9kmPZi9r/8a5jw0a9yg+LMmr7qa0wjqvSiI=
github.com/prometheus/client_golang v1.2
0.2
h1:
5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg
=
github.com/prometheus/client_golang v1.2
2.0
h1:
rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q
=
github.com/prometheus/client_golang v1.2
0.2
/go.mod h1:
PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE
=
github.com/prometheus/client_golang v1.2
2.0
/go.mod h1:
R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0
=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.
55
.0 h1:
KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc
=
github.com/prometheus/common v0.
62
.0 h1:
xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io
=
github.com/prometheus/common v0.
55
.0/go.mod h1:
2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8
=
github.com/prometheus/common v0.
62
.0/go.mod h1:
vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I
=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
...
@@ -108,6 +112,8 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN
...
@@ -108,6 +112,8 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
...
@@ -115,12 +121,12 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
...
@@ -115,12 +121,12 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/etcd/api/v3 v3.5.1
6
h1:
WvmyJVbjWqK4R1E+B12RRHz3bRGy9XVfh++MgbN+6n0
=
go.etcd.io/etcd/api/v3 v3.5.
2
1 h1:
A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8
=
go.etcd.io/etcd/api/v3 v3.5.1
6
/go.mod h1:
1P4SlIP/VwkDmGo3OlOD7faPeP8KDIFhqvciH5EfN28
=
go.etcd.io/etcd/api/v3 v3.5.
2
1/go.mod h1:
c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY
=
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
h1:
ZgY48uH6UvB+/7R9Yf4x574uCO3jIx0TRDyetSfId3Q
=
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1 h1:
lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc
=
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
/go.mod h1:
V8acl8pcEK0Y2g19YlOV9m9ssUe6MgiDSobSoaBAM0E
=
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1/go.mod h1:
BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs
=
go.etcd.io/etcd/client/v3 v3.5.1
6
h1:
sSmVYOAHeC9doqi0gv7v86oY/BTld0SEFGaxsU9eRhE
=
go.etcd.io/etcd/client/v3 v3.5.
2
1 h1:
T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY
=
go.etcd.io/etcd/client/v3 v3.5.1
6
/go.mod h1:
X+rExSGkyqxvu276cr2OwPLBaeqFu1cIl4vmRjAD/50
=
go.etcd.io/etcd/client/v3 v3.5.
2
1/go.mod h1:
mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU
=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
...
@@ -172,8 +178,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
...
@@ -172,8 +178,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.
7
.0 h1:
ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ
=
golang.org/x/time v0.
9
.0 h1:
EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY
=
golang.org/x/time v0.
7
.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.
9
.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
...
@@ -209,28 +215,29 @@ istio.io/api v1.23.1 h1:bm2XF0j058FfzWVHUfpmMj4sFDkcD1X609qs5AU97Pc=
...
@@ -209,28 +215,29 @@ istio.io/api v1.23.1 h1:bm2XF0j058FfzWVHUfpmMj4sFDkcD1X609qs5AU97Pc=
istio.io/api v1.23.1/go.mod h1:QPSTGXuIQdnZFEm3myf9NZ5uBMwCdJWUvfj9ZZ+2oBM=
istio.io/api v1.23.1/go.mod h1:QPSTGXuIQdnZFEm3myf9NZ5uBMwCdJWUvfj9ZZ+2oBM=
istio.io/client-go v1.23.1 h1:IX2cgUUXnVYo+9H6bFGSp/vuKVLPUkmiN8qk1/mvsYs=
istio.io/client-go v1.23.1 h1:IX2cgUUXnVYo+9H6bFGSp/vuKVLPUkmiN8qk1/mvsYs=
istio.io/client-go v1.23.1/go.mod h1:+fxu+O2GkITM3HEREUWdobvRXqI/UhAAI7hfxqqpRh0=
istio.io/client-go v1.23.1/go.mod h1:+fxu+O2GkITM3HEREUWdobvRXqI/UhAAI7hfxqqpRh0=
k8s.io/api v0.3
2
.3 h1:
Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls
=
k8s.io/api v0.3
3
.3 h1:
SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8
=
k8s.io/api v0.3
2
.3/go.mod h1:
2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k
=
k8s.io/api v0.3
3
.3/go.mod h1:
01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE
=
k8s.io/apiextensions-apiserver v0.3
2
.3 h1:
4D8vy+9GWerlErCwVIbcQjsWunF9SUGNu7O7hiQTyPY
=
k8s.io/apiextensions-apiserver v0.3
3
.3 h1:
qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs
=
k8s.io/apiextensions-apiserver v0.3
2
.3/go.mod h1:
8YwcvVRMVzw0r1Stc7XfGAzB/SIVLunqApySV5V7Dss
=
k8s.io/apiextensions-apiserver v0.3
3
.3/go.mod h1:
oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8
=
k8s.io/apimachinery v0.3
2
.3 h1:
JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U
=
k8s.io/apimachinery v0.3
3
.3 h1:
4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA
=
k8s.io/apimachinery v0.3
2
.3/go.mod h1:
GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE
=
k8s.io/apimachinery v0.3
3
.3/go.mod h1:
BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM
=
k8s.io/client-go v0.3
2
.3 h1:
RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU
=
k8s.io/client-go v0.3
3
.3 h1:
M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA
=
k8s.io/client-go v0.3
2
.3/go.mod h1:
3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY
=
k8s.io/client-go v0.3
3
.3/go.mod h1:
luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg
=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y
=
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4
=
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4
=
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8
=
k8s.io/utils v0.0.0-202
41
210
0
5
4802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0
=
k8s.io/utils v0.0.0-202
5050
2105
355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg
=
k8s.io/utils v0.0.0-202
41210054802-24370beab758
/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
k8s.io/utils v0.0.0-202
50502105355-0f33e8f1c979
/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/controller-runtime v0.2
0.4
h1:
X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU
=
sigs.k8s.io/controller-runtime v0.2
1.0
h1:
CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8
=
sigs.k8s.io/controller-runtime v0.2
0.4
/go.mod h1:
xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY
=
sigs.k8s.io/controller-runtime v0.2
1.0
/go.mod h1:
OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM
=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/lws v0.6.1 h1:cWiRmMSflo8hQPBrmIIZtoaX3XuVkmAgFKkmjxlPULI=
sigs.k8s.io/lws v0.6.1 h1:cWiRmMSflo8hQPBrmIIZtoaX3XuVkmAgFKkmjxlPULI=
sigs.k8s.io/lws v0.6.1/go.mod h1:aoT5ROMriBtN/H8JH0POBF6e2uyFCOxKGKtXSA3DVV8=
sigs.k8s.io/lws v0.6.1/go.mod h1:aoT5ROMriBtN/H8JH0POBF6e2uyFCOxKGKtXSA3DVV8=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
...
...
deploy/cloud/operator/internal/consts/consts.go
View file @
ee3a8e42
...
@@ -17,6 +17,8 @@ const (
...
@@ -17,6 +17,8 @@ const (
KubeLabelDynamoSelector
=
"nvidia.com/selector"
KubeLabelDynamoSelector
=
"nvidia.com/selector"
KubeAnnotationEnableGrove
=
"nvidia.com/enable-grove"
KubeLabelDynamoComponent
=
"nvidia.com/dynamo-component"
KubeLabelDynamoComponent
=
"nvidia.com/dynamo-component"
KubeLabelDynamoNamespace
=
"nvidia.com/dynamo-namespace"
KubeLabelDynamoNamespace
=
"nvidia.com/dynamo-namespace"
KubeLabelDynamoDeploymentTargetType
=
"nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoDeploymentTargetType
=
"nvidia.com/dynamo-deployment-target-type"
...
@@ -33,4 +35,6 @@ const (
...
@@ -33,4 +35,6 @@ const (
ComponentTypePlanner
=
"planner"
ComponentTypePlanner
=
"planner"
ComponentTypeMain
=
"main"
ComponentTypeMain
=
"main"
PlannerServiceAccountName
=
"planner-serviceaccount"
PlannerServiceAccountName
=
"planner-serviceaccount"
DefaultIngressSuffix
=
"local"
)
)
deploy/cloud/operator/internal/controller/common.go
View file @
ee3a8e42
...
@@ -18,8 +18,6 @@
...
@@ -18,8 +18,6 @@
package
controller
package
controller
import
(
import
(
"fmt"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
...
@@ -51,18 +49,6 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
...
@@ -51,18 +49,6 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
return
crd
.
GetName
()
return
crd
.
GetName
()
}
}
func
getIngressHost
(
ingressSpec
v1alpha1
.
IngressSpec
)
string
{
host
:=
ingressSpec
.
Host
if
ingressSpec
.
HostPrefix
!=
nil
{
host
=
*
ingressSpec
.
HostPrefix
+
host
}
ingressSuffix
:=
DefaultIngressSuffix
if
ingressSpec
.
HostSuffix
!=
nil
{
ingressSuffix
=
*
ingressSpec
.
HostSuffix
}
return
fmt
.
Sprintf
(
"%s.%s"
,
host
,
ingressSuffix
)
}
type
dockerSecretRetriever
interface
{
type
dockerSecretRetriever
interface
{
// returns a list of secret names associated with the docker registry
// returns a list of secret names associated with the docker registry
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
View file @
ee3a8e42
...
@@ -24,7 +24,6 @@ import (
...
@@ -24,7 +24,6 @@ import (
"fmt"
"fmt"
"os"
"os"
"strconv"
"strconv"
"strings"
"time"
"time"
"github.com/imdario/mergo"
"github.com/imdario/mergo"
...
@@ -41,7 +40,7 @@ import (
...
@@ -41,7 +40,7 @@ import (
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
istioNetworking
"istio.io/api/netw
or
k
in
g/v1beta1
"
"github.com/ai-dynamo/dynamo/deploy/cloud/operat
or
/
in
ternal/dynamo
"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
k8serrors
"k8s.io/apimachinery/pkg/api/errors"
k8serrors
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/meta"
...
@@ -73,7 +72,6 @@ const (
...
@@ -73,7 +72,6 @@ const (
DeploymentTargetTypeProduction
=
"production"
DeploymentTargetTypeProduction
=
"production"
DeploymentTargetTypeDebug
=
"debug"
DeploymentTargetTypeDebug
=
"debug"
HeaderNameDebug
=
"X-Nvidia-Debug"
HeaderNameDebug
=
"X-Nvidia-Debug"
DefaultIngressSuffix
=
"local"
KubernetesDeploymentStrategy
=
"kubernetes"
KubernetesDeploymentStrategy
=
"kubernetes"
KubeAnnotationDeploymentType
=
"nvidia.com/deployment-type"
KubeAnnotationDeploymentType
=
"nvidia.com/deployment-type"
...
@@ -88,10 +86,7 @@ type DynamoComponentDeploymentReconciler struct {
...
@@ -88,10 +86,7 @@ type DynamoComponentDeploymentReconciler struct {
client
.
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdAddr
string
EtcdStorage
etcdStorage
EtcdStorage
etcdStorage
UseVirtualService
bool
DockerSecretRetriever
dockerSecretRetriever
DockerSecretRetriever
dockerSecretRetriever
}
}
...
@@ -952,7 +947,7 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteIngress(ctx
...
@@ -952,7 +947,7 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteIngress(ctx
if
err
!=
nil
{
if
err
!=
nil
{
return
false
,
err
return
false
,
err
}
}
if
r
.
UseVirtualService
{
if
r
.
Config
.
IngressConfig
.
UseVirtualService
()
{
modified_
,
_
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
opt
.
dynamoComponentDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
modified_
,
_
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
opt
.
dynamoComponentDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
return
r
.
generateVirtualService
(
ctx
,
opt
)
return
r
.
generateVirtualService
(
ctx
,
opt
)
})
})
...
@@ -975,49 +970,11 @@ func (r *DynamoComponentDeploymentReconciler) generateIngress(ctx context.Contex
...
@@ -975,49 +970,11 @@ func (r *DynamoComponentDeploymentReconciler) generateIngress(ctx context.Contex
},
},
}
}
if
!
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
||
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
==
nil
{
if
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
==
nil
||
!
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
||
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
==
nil
{
log
.
Info
(
"Ingress is not enabled"
)
log
.
Info
(
"Ingress is not enabled"
)
return
ingress
,
true
,
nil
return
ingress
,
true
,
nil
}
}
host
:=
getIngressHost
(
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
)
return
dynamo
.
GenerateComponentIngress
(
ctx
,
opt
.
dynamoComponentDeployment
.
Name
,
opt
.
dynamoComponentDeployment
.
Namespace
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
false
,
nil
ingress
.
Spec
=
networkingv1
.
IngressSpec
{
IngressClassName
:
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
,
Rules
:
[]
networkingv1
.
IngressRule
{
{
Host
:
host
,
IngressRuleValue
:
networkingv1
.
IngressRuleValue
{
HTTP
:
&
networkingv1
.
HTTPIngressRuleValue
{
Paths
:
[]
networkingv1
.
HTTPIngressPath
{
{
Path
:
"/"
,
PathType
:
&
[]
networkingv1
.
PathType
{
networkingv1
.
PathTypePrefix
}[
0
],
Backend
:
networkingv1
.
IngressBackend
{
Service
:
&
networkingv1
.
IngressServiceBackend
{
Name
:
opt
.
dynamoComponentDeployment
.
Name
,
Port
:
networkingv1
.
ServiceBackendPort
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
},
},
},
}
if
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
TLS
!=
nil
{
ingress
.
Spec
.
TLS
=
[]
networkingv1
.
IngressTLS
{
{
Hosts
:
[]
string
{
host
},
SecretName
:
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
TLS
.
SecretName
,
},
}
}
return
ingress
,
false
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateVirtualService
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateVirtualService
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
...
@@ -1031,40 +988,12 @@ func (r *DynamoComponentDeploymentReconciler) generateVirtualService(ctx context
...
@@ -1031,40 +988,12 @@ func (r *DynamoComponentDeploymentReconciler) generateVirtualService(ctx context
},
},
}
}
vsEnabled
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
UseVirtualService
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
!=
nil
vsEnabled
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
!=
nil
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
UseVirtualService
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
!=
nil
if
!
vsEnabled
{
if
!
vsEnabled
{
log
.
Info
(
"VirtualService is not enabled"
)
log
.
Info
(
"VirtualService is not enabled"
)
return
vs
,
true
,
nil
return
vs
,
true
,
nil
}
}
return
dynamo
.
GenerateComponentVirtualService
(
ctx
,
opt
.
dynamoComponentDeployment
.
Name
,
opt
.
dynamoComponentDeployment
.
Namespace
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
false
,
nil
vs
.
Spec
=
istioNetworking
.
VirtualService
{
Hosts
:
[]
string
{
getIngressHost
(
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
},
Gateways
:
[]
string
{
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
},
Http
:
[]
*
istioNetworking
.
HTTPRoute
{
{
Match
:
[]
*
istioNetworking
.
HTTPMatchRequest
{
{
Uri
:
&
istioNetworking
.
StringMatch
{
MatchType
:
&
istioNetworking
.
StringMatch_Prefix
{
Prefix
:
"/"
},
},
},
},
Route
:
[]
*
istioNetworking
.
HTTPRouteDestination
{
{
Destination
:
&
istioNetworking
.
Destination
{
Host
:
opt
.
dynamoComponentDeployment
.
Name
,
Port
:
&
istioNetworking
.
PortSelector
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
}
return
vs
,
false
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
getKubeName
(
dynamoComponentDeployment
*
v1alpha1
.
DynamoComponentDeployment
,
debug
bool
)
string
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
getKubeName
(
dynamoComponentDeployment
*
v1alpha1
.
DynamoComponentDeployment
,
debug
bool
)
string
{
...
@@ -1274,7 +1203,6 @@ func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOp
...
@@ -1274,7 +1203,6 @@ func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOp
//nolint:gocyclo,nakedret
//nolint:gocyclo,nakedret
func
(
r
*
DynamoComponentDeploymentReconciler
)
generatePodTemplateSpec
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
podTemplateSpec
*
corev1
.
PodTemplateSpec
,
err
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generatePodTemplateSpec
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
podTemplateSpec
*
corev1
.
PodTemplateSpec
,
err
error
)
{
logs
:=
log
.
FromContext
(
ctx
)
podLabels
:=
r
.
getKubeLabels
(
opt
.
dynamoComponentDeployment
)
podLabels
:=
r
.
getKubeLabels
(
opt
.
dynamoComponentDeployment
)
if
opt
.
isStealingTrafficDebugModeEnabled
{
if
opt
.
isStealingTrafficDebugModeEnabled
{
podLabels
[
commonconsts
.
KubeLabelDynamoDeploymentTargetType
]
=
DeploymentTargetTypeDebug
podLabels
[
commonconsts
.
KubeLabelDynamoDeploymentTargetType
]
=
DeploymentTargetTypeDebug
...
@@ -1333,17 +1261,17 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1333,17 +1261,17 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
},
},
}
}
if
r
.
NatsAddr
!=
""
{
if
r
.
Config
.
NatsAddr
ess
!=
""
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
Name
:
"NATS_SERVER"
,
Name
:
"NATS_SERVER"
,
Value
:
r
.
NatsAddr
,
Value
:
r
.
Config
.
NatsAddr
ess
,
})
})
}
}
if
r
.
EtcdAddr
!=
""
{
if
r
.
Config
.
EtcdAddr
ess
!=
""
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
Name
:
"ETCD_ENDPOINTS"
,
Name
:
"ETCD_ENDPOINTS"
,
Value
:
r
.
EtcdAddr
,
Value
:
r
.
Config
.
EtcdAddr
ess
,
})
})
}
}
...
@@ -1366,34 +1294,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1366,34 +1294,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
volumes
:=
make
([]
corev1
.
Volume
,
0
)
volumes
:=
make
([]
corev1
.
Volume
,
0
)
volumeMounts
:=
make
([]
corev1
.
VolumeMount
,
0
)
volumeMounts
:=
make
([]
corev1
.
VolumeMount
,
0
)
args
:=
make
([]
string
,
0
)
args
=
append
(
args
,
"cd"
,
"src"
,
"&&"
,
"uv"
,
"run"
,
"dynamo"
,
"serve"
)
// ensure liveness and readiness probes are enabled for the dynamo components
args
=
append
(
args
,
"--system-app-port"
,
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoHealthPort
))
args
=
append
(
args
,
"--enable-system-app"
)
args
=
append
(
args
,
"--use-default-health-checks"
)
if
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
!=
""
{
args
=
append
(
args
,
[]
string
{
"--service-name"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
}
...
)
args
=
append
(
args
,
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoTag
)
if
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
!=
nil
&&
*
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
!=
""
{
args
=
append
(
args
,
fmt
.
Sprintf
(
"--%s.ServiceArgs.dynamo.namespace=%s"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
))
}
if
componentType
,
exists
:=
opt
.
dynamoComponentDeployment
.
Labels
[
commonconsts
.
KubeLabelDynamoComponent
];
exists
&&
componentType
==
ComponentTypePlanner
{
args
=
append
(
args
,
fmt
.
Sprintf
(
"--%s.environment=%s"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
,
KubernetesDeploymentStrategy
))
}
}
if
len
(
opt
.
dynamoComponentDeployment
.
Spec
.
Envs
)
>
0
{
for
_
,
env
:=
range
opt
.
dynamoComponentDeployment
.
Spec
.
Envs
{
if
env
.
Name
==
"DYNAMO_CONFIG_PATH"
{
args
=
append
(
args
,
"-f"
,
env
.
Value
)
}
}
}
dynamoResources
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Resources
dynamoResources
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Resources
resources
,
err
:=
getResourcesConfig
(
dynamoResources
)
resources
,
err
:=
getResourcesConfig
(
dynamoResources
)
...
@@ -1468,8 +1368,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1468,8 +1368,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
container
:=
corev1
.
Container
{
container
:=
corev1
.
Container
{
Name
:
"main"
,
Name
:
"main"
,
Image
:
imageName
,
Image
:
imageName
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
strings
.
Join
(
args
,
" "
)},
LivenessProbe
:
livenessProbe
,
LivenessProbe
:
livenessProbe
,
ReadinessProbe
:
readinessProbe
,
ReadinessProbe
:
readinessProbe
,
Resources
:
resources
,
Resources
:
resources
,
...
@@ -1566,23 +1464,8 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1566,23 +1464,8 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
!=
nil
{
if
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
!=
nil
{
extraPodSpecMainContainer
:=
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
.
MainContainer
extraPodSpecMainContainer
:=
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
.
MainContainer
if
extraPodSpecMainContainer
!=
nil
{
if
extraPodSpecMainContainer
!=
nil
{
if
len
(
extraPodSpecMainContainer
.
Command
)
>
0
{
// Merge non empty fields from extraPodSpecMainContainer into container, only overriding empty fields
logs
.
Info
(
"Overriding container '"
+
container
.
Name
+
"' Command with: "
+
strings
.
Join
(
extraPodSpecMainContainer
.
Command
,
" "
))
err
:=
mergo
.
Merge
(
&
container
,
extraPodSpecMainContainer
.
DeepCopy
())
container
.
Command
=
extraPodSpecMainContainer
.
Command
}
if
len
(
extraPodSpecMainContainer
.
Args
)
>
0
{
// Special case: if command is "sh -c", we must collapse args into a single string
if
len
(
container
.
Command
)
==
2
&&
container
.
Command
[
0
]
==
"sh"
&&
container
.
Command
[
1
]
==
"-c"
{
joinedArgs
:=
strings
.
Join
(
extraPodSpecMainContainer
.
Args
,
" "
)
logs
.
Info
(
"Special case detected for container '"
+
container
.
Name
+
"': Command is 'sh -c'; collapsing Args to: "
+
joinedArgs
)
container
.
Args
=
[]
string
{
joinedArgs
}
}
else
{
logs
.
Info
(
"Overriding container '"
+
container
.
Name
+
"' Args with: "
+
strings
.
Join
(
extraPodSpecMainContainer
.
Args
,
" "
))
container
.
Args
=
extraPodSpecMainContainer
.
Args
}
}
// finally, Merge non empty fields from extraPodSpecMainContainer into container, only overriding empty fields
err
:=
mergo
.
Merge
(
&
container
,
extraPodSpecMainContainer
)
if
err
!=
nil
{
if
err
!=
nil
{
err
=
errors
.
Wrapf
(
err
,
"failed to merge extraPodSpecMainContainer into container"
)
err
=
errors
.
Wrapf
(
err
,
"failed to merge extraPodSpecMainContainer into container"
)
return
nil
,
err
return
nil
,
err
...
@@ -1723,7 +1606,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1723,7 +1606,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
}
}
func
getResourcesConfig
(
resources
*
dynamoCommon
.
Resources
)
(
corev1
.
ResourceRequirements
,
error
)
{
func
getResourcesConfig
(
resources
*
dynamoCommon
.
Resources
)
(
corev1
.
ResourceRequirements
,
error
)
{
curren
tResources
:=
corev1
.
ResourceRequirements
{
defaul
tResources
:=
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
),
...
@@ -1733,86 +1616,18 @@ func getResourcesConfig(resources *dynamoCommon.Resources) (corev1.ResourceRequi
...
@@ -1733,86 +1616,18 @@ func getResourcesConfig(resources *dynamoCommon.Resources) (corev1.ResourceRequi
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
},
},
}
}
if
resources
==
nil
{
if
resources
==
nil
{
return
currentResources
,
nil
return
defaultResources
,
nil
}
if
resources
.
Limits
!=
nil
{
if
resources
.
Limits
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
CPU
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits cpu quantity"
)
}
}
if
currentResources
.
Limits
==
nil
{
resourcesConfig
,
err
:=
controller_common
.
GetResourcesConfig
(
resources
)
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Limits
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
Memory
)
if
err
!=
nil
{
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits memory quantity"
)
return
corev1
.
ResourceRequirements
{},
errors
.
Wrapf
(
err
,
"failed to get resources config"
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
}
currentResources
.
Limits
[
corev1
.
ResourceMemory
]
=
q
err
=
mergo
.
Merge
(
resourcesConfig
,
defaultResources
.
DeepCopy
())
}
if
resources
.
Limits
.
GPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
GPU
)
if
err
!=
nil
{
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits gpu quantity"
)
return
corev1
.
ResourceRequirements
{},
errors
.
Wrapf
(
err
,
"failed to merge resources config"
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
commonconsts
.
KubeResourceGPUNvidia
]
=
q
}
for
k
,
v
:=
range
resources
.
Limits
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits %s quantity"
,
k
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
if
resources
.
Requests
!=
nil
{
if
resources
.
Requests
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
CPU
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests cpu quantity"
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Requests
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
Memory
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests memory quantity"
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceMemory
]
=
q
}
for
k
,
v
:=
range
resources
.
Requests
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests %s quantity"
,
k
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
}
return
currentR
esources
,
nil
return
*
r
esources
Config
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateService
(
opt
generateResourceOption
)
(
*
corev1
.
Service
,
bool
,
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateService
(
opt
generateResourceOption
)
(
*
corev1
.
Service
,
bool
,
error
)
{
...
@@ -1930,7 +1745,7 @@ func (r *DynamoComponentDeploymentReconciler) SetupWithManager(mgr ctrl.Manager)
...
@@ -1930,7 +1745,7 @@ func (r *DynamoComponentDeploymentReconciler) SetupWithManager(mgr ctrl.Manager)
}))
}))
}
}
if
r
.
UseVirtualService
{
if
r
.
Config
.
IngressConfig
.
UseVirtualService
()
{
m
.
Owns
(
&
networkingv1beta1
.
VirtualService
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{}))
m
.
Owns
(
&
networkingv1beta1
.
VirtualService
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{}))
}
}
m
.
Owns
(
&
autoscalingv2
.
HorizontalPodAutoscaler
{})
m
.
Owns
(
&
autoscalingv2
.
HorizontalPodAutoscaler
{})
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
ee3a8e42
...
@@ -278,7 +278,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
...
@@ -278,7 +278,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"someservice"
,
Host
:
"someservice"
,
IngressControllerClassName
:
&
[]
string
{
"nginx"
}[
0
],
IngressControllerClassName
:
&
[]
string
{
"nginx"
}[
0
],
...
@@ -337,7 +337,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
...
@@ -337,7 +337,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
false
,
Enabled
:
false
,
},
},
},
},
...
@@ -400,7 +400,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
...
@@ -400,7 +400,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
},
},
},
},
...
@@ -432,7 +432,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
...
@@ -432,7 +432,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"someservice"
,
Host
:
"someservice"
,
UseVirtualService
:
true
,
UseVirtualService
:
true
,
...
@@ -498,10 +498,7 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
...
@@ -498,10 +498,7 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
Client
client
.
Client
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdAddr
string
EtcdStorage
etcdStorage
EtcdStorage
etcdStorage
UseVirtualService
bool
}
}
type
args
struct
{
type
args
struct
{
ctx
context
.
Context
ctx
context
.
Context
...
@@ -758,10 +755,7 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
...
@@ -758,10 +755,7 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
Client
:
tt
.
fields
.
Client
,
Client
:
tt
.
fields
.
Client
,
Recorder
:
tt
.
fields
.
Recorder
,
Recorder
:
tt
.
fields
.
Recorder
,
Config
:
tt
.
fields
.
Config
,
Config
:
tt
.
fields
.
Config
,
NatsAddr
:
tt
.
fields
.
NatsAddr
,
EtcdAddr
:
tt
.
fields
.
EtcdAddr
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
UseVirtualService
:
tt
.
fields
.
UseVirtualService
,
}
}
got
,
got1
,
err
:=
r
.
generateVolcanoPodGroup
(
tt
.
args
.
ctx
,
tt
.
args
.
opt
)
got
,
got1
,
err
:=
r
.
generateVolcanoPodGroup
(
tt
.
args
.
ctx
,
tt
.
args
.
opt
)
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
...
@@ -789,10 +783,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -789,10 +783,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Client
client
.
Client
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdAddr
string
EtcdStorage
etcdStorage
EtcdStorage
etcdStorage
UseVirtualService
bool
DockerSecretRetriever
*
mockDockerSecretRetriever
DockerSecretRetriever
*
mockDockerSecretRetriever
}
}
type
args
struct
{
type
args
struct
{
...
@@ -847,6 +838,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -847,6 +838,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image:latest"
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
,
},
Args
:
[]
string
{
"some dynamo command"
,
},
},
},
},
},
},
},
...
@@ -897,7 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -897,7 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Name
:
"main"
,
Name
:
"main"
,
Image
:
"test-image:latest"
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
"ray start --head --port=6379 &&
cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default
"
},
Args
:
[]
string
{
"ray start --head --port=6379 &&
some dynamo command
"
},
Env
:
[]
corev1
.
EnvVar
{{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
)}},
Env
:
[]
corev1
.
EnvVar
{{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
)}},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
{
...
@@ -1095,10 +1093,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -1095,10 +1093,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Client
:
fakeKubeClient
,
// Use the fake client
Client
:
fakeKubeClient
,
// Use the fake client
Recorder
:
tt
.
fields
.
Recorder
,
Recorder
:
tt
.
fields
.
Recorder
,
Config
:
tt
.
fields
.
Config
,
Config
:
tt
.
fields
.
Config
,
NatsAddr
:
tt
.
fields
.
NatsAddr
,
EtcdAddr
:
tt
.
fields
.
EtcdAddr
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
UseVirtualService
:
tt
.
fields
.
UseVirtualService
,
DockerSecretRetriever
:
tt
.
fields
.
DockerSecretRetriever
,
DockerSecretRetriever
:
tt
.
fields
.
DockerSecretRetriever
,
// Scheme: s, // Pass scheme if reconciler uses it directly, often client uses it
// Scheme: s, // Pass scheme if reconciler uses it directly, often client uses it
}
}
...
...
deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
View file @
ee3a8e42
...
@@ -21,6 +21,10 @@ import (
...
@@ -21,6 +21,10 @@ import (
"context"
"context"
"fmt"
"fmt"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
corev1
"k8s.io/api/core/v1"
networkingv1
"k8s.io/api/networking/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/record"
ctrl
"sigs.k8s.io/controller-runtime"
ctrl
"sigs.k8s.io/controller-runtime"
...
@@ -31,14 +35,19 @@ import (
...
@@ -31,14 +35,19 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/predicate"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
)
)
type
State
string
type
Reason
string
type
Message
string
const
(
const
(
FailedState
=
"failed"
FailedState
State
=
"failed"
ReadyState
=
"successful"
ReadyState
State
=
"successful"
PendingState
=
"pending"
PendingState
State
=
"pending"
)
)
type
etcdStorage
interface
{
type
etcdStorage
interface
{
...
@@ -50,15 +59,13 @@ type DynamoGraphDeploymentReconciler struct {
...
@@ -50,15 +59,13 @@ type DynamoGraphDeploymentReconciler struct {
client
.
Client
client
.
Client
Config
commonController
.
Config
Config
commonController
.
Config
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
VirtualServiceGateway
string
DockerSecretRetriever
dockerSecretRetriever
IngressControllerClassName
string
IngressControllerTLSSecret
string
IngressHostSuffix
string
}
}
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=grove.io,resources=podgangsets,verbs=get;list;watch;create;update;patch;delete
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// move the current state of the cluster closer to the desired state.
...
@@ -73,8 +80,9 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -73,8 +80,9 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
logger
:=
log
.
FromContext
(
ctx
)
logger
:=
log
.
FromContext
(
ctx
)
var
err
error
var
err
error
reason
:=
"undefined"
reason
:=
Reason
(
"undefined"
)
message
:=
""
message
:=
Message
(
""
)
state
:=
PendingState
readyStatus
:=
metav1
.
ConditionFalse
readyStatus
:=
metav1
.
ConditionFalse
// retrieve the CRD
// retrieve the CRD
dynamoDeployment
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{}
dynamoDeployment
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{}
...
@@ -88,16 +96,20 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -88,16 +96,20 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
defer
func
()
{
defer
func
()
{
if
err
!=
nil
{
if
err
!=
nil
{
dynamoDeployment
.
SetS
tate
(
FailedState
)
s
tate
=
FailedState
message
=
err
.
Error
()
message
=
Message
(
err
.
Error
()
)
logger
.
Error
(
err
,
"Reconciliation failed"
)
logger
.
Error
(
err
,
"Reconciliation failed"
)
}
}
dynamoDeployment
.
SetState
(
string
(
state
))
if
state
==
ReadyState
{
readyStatus
=
metav1
.
ConditionTrue
}
// update the CRD status condition
// update the CRD status condition
dynamoDeployment
.
AddStatusCondition
(
metav1
.
Condition
{
dynamoDeployment
.
AddStatusCondition
(
metav1
.
Condition
{
Type
:
"Ready"
,
Type
:
"Ready"
,
Status
:
readyStatus
,
Status
:
readyStatus
,
Reason
:
reason
,
Reason
:
string
(
reason
)
,
Message
:
message
,
Message
:
string
(
message
)
,
LastTransitionTime
:
metav1
.
Now
(),
LastTransitionTime
:
metav1
.
Now
(),
})
})
err
=
r
.
Status
()
.
Update
(
ctx
,
dynamoDeployment
)
err
=
r
.
Status
()
.
Update
(
ctx
,
dynamoDeployment
)
...
@@ -116,81 +128,164 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -116,81 +128,164 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
if
deleted
{
if
deleted
{
return
ctrl
.
Result
{},
nil
return
ctrl
.
Result
{},
nil
}
}
state
,
reason
,
message
,
err
=
r
.
reconcileResources
(
ctx
,
dynamoDeployment
)
// generate the dynamoComponentsDeployments from the config
dynamoComponentsDeployments
,
err
:=
dynamo
.
GenerateDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
,
r
.
generateDefaultIngressSpec
(
dynamoDeployment
))
if
err
!=
nil
{
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to
generate the DynamoComponentsDeployments and DynamoComponent
s"
)
logger
.
Error
(
err
,
"failed to
reconcile the resource
s"
)
reason
=
"failed_to_
generate_the_DynamoComponentsDeployment
s"
reason
=
"failed_to_
reconcile_the_resource
s"
return
ctrl
.
Result
{},
err
return
ctrl
.
Result
{},
err
}
}
return
ctrl
.
Result
{},
nil
}
// merge the dynamoComponentsDeployments with the dynamoComponentsDeployments from the CRD
type
Resource
interface
{
for
_
,
deployment
:=
range
dynamoComponentsDeployments
{
IsReady
()
bool
if
deployment
.
Spec
.
Ingress
.
Enabled
{
GetName
()
string
dynamoDeployment
.
SetEndpointStatus
(
r
.
isEndpointSecured
(),
getIngressHost
(
deployment
.
Spec
.
Ingress
))
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileResources
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
logger
:=
log
.
FromContext
(
ctx
)
if
r
.
Config
.
EnableGrove
{
// check if explicit opt out of grove
if
dynamoDeployment
.
Annotations
[
consts
.
KubeAnnotationEnableGrove
]
==
consts
.
KubeLabelValueFalse
{
logger
.
Info
(
"Grove is explicitly disabled for this deployment, skipping grove resources reconciliation"
)
return
r
.
reconcileDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
)
}
}
return
r
.
reconcileGroveResources
(
ctx
,
dynamoDeployment
)
}
}
return
r
.
reconcileDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
)
notReadyDeployments
:=
[]
string
{}
}
// reconcile the dynamoComponentsDeployments
for
serviceName
,
dynamoComponentDeployment
:=
range
dynamoComponentsDeployments
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileGroveResources
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
logger
.
Info
(
"Reconciling the DynamoComponentDeployment"
,
"serviceName"
,
serviceName
,
"dynamoComponentDeployment"
,
dynamoComponentDeployment
)
logger
:=
log
.
FromContext
(
ctx
)
_
,
dynamoComponentDeployment
,
err
=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
nvidiacomv1alpha1
.
DynamoComponentDeployment
,
bool
,
error
)
{
// generate the dynamoComponentsDeployments from the config
return
dynamoComponentDeployment
,
false
,
nil
groveGangSet
,
err
:=
dynamo
.
GenerateGrovePodGangSet
(
ctx
,
dynamoDeployment
,
r
.
Config
,
r
.
DockerSecretRetriever
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the Grove GangSet"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the Grove GangSet: %w"
,
err
)
}
_
,
syncedGroveGangSet
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
grovev1alpha1
.
PodGangSet
,
bool
,
error
)
{
return
groveGangSet
,
false
,
nil
})
})
if
err
!=
nil
{
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the DynamoComponentDeployment"
)
logger
.
Error
(
err
,
"failed to sync the Grove GangSet"
)
reason
=
"failed_to_sync_the_DynamoComponentDeployment"
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the Grove GangSet: %w"
,
err
)
return
ctrl
.
Result
{},
err
}
}
if
!
dynamoComponentDeployment
.
Status
.
IsReady
()
{
groveGangSetAsResource
:=
commonController
.
WrapResource
(
syncedGroveGangSet
,
func
()
bool
{
notReadyDeployments
=
append
(
notReadyDeployments
,
dynamoComponentDeployment
.
Name
)
if
syncedGroveGangSet
.
Status
.
LastOperation
!=
nil
&&
syncedGroveGangSet
.
Status
.
LastOperation
.
State
==
grovev1alpha1
.
LastOperationStateSucceeded
{
return
true
}
}
return
false
})
resources
:=
[]
Resource
{
groveGangSetAsResource
}
for
componentName
,
component
:=
range
dynamoDeployment
.
Spec
.
Services
{
if
component
.
ComponentType
==
consts
.
ComponentTypeMain
{
// generate the main component service
mainComponentService
,
err
:=
dynamo
.
GenerateComponentService
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component service: %w"
,
err
)
}
}
if
len
(
notReadyDeployments
)
==
0
{
_
,
syncedMainComponentService
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
corev1
.
Service
,
bool
,
error
)
{
dynamoDeployment
.
SetState
(
ReadyState
)
return
mainComponentService
,
false
,
nil
reason
=
"all_deployments_are_ready"
})
message
=
"All deployments are ready"
if
err
!=
nil
{
readyStatus
=
metav1
.
ConditionTrue
logger
.
Error
(
err
,
"failed to sync the main component service"
)
}
else
{
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component service: %w"
,
err
)
reason
=
"some_deployments_are_not_ready"
message
=
fmt
.
Sprintf
(
"The following deployments are not ready: %v"
,
notReadyDeployments
)
dynamoDeployment
.
SetState
(
PendingState
)
}
}
mainComponentServiceAsResource
:=
commonController
.
WrapResource
(
syncedMainComponentService
,
func
()
bool
{
return
ctrl
.
Result
{},
nil
return
true
})
}
resources
=
append
(
resources
,
mainComponentServiceAsResource
)
// generate the main component ingress
func
(
r
*
DynamoGraphDeploymentReconciler
)
generateDefaultIngressSpec
(
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
*
nvidiacomv1alpha1
.
IngressSpec
{
ingressSpec
:=
dynamo
.
GenerateDefaultIngressSpec
(
dynamoDeployment
,
r
.
Config
.
IngressConfig
)
res
:=
&
nvidiacomv1alpha1
.
IngressSpec
{
if
component
.
Ingress
!=
nil
{
Enabled
:
r
.
VirtualServiceGateway
!=
""
||
r
.
IngressControllerClassName
!=
""
,
ingressSpec
=
*
component
.
Ingress
Host
:
dynamoDeployment
.
Name
,
}
UseVirtualService
:
r
.
VirtualServiceGateway
!=
""
,
mainComponentIngress
:=
dynamo
.
GenerateComponentIngress
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
,
ingressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component ingress"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component ingress: %w"
,
err
)
}
_
,
syncedMainComponentIngress
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1
.
Ingress
,
bool
,
error
)
{
if
!
ingressSpec
.
Enabled
||
ingressSpec
.
IngressControllerClassName
==
nil
{
logger
.
Info
(
"Ingress is not enabled"
)
return
mainComponentIngress
,
true
,
nil
}
return
mainComponentIngress
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the main component ingress"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component ingress: %w"
,
err
)
}
}
if
r
.
IngressControllerClassName
!=
""
{
resources
=
append
(
resources
,
commonController
.
WrapResource
(
syncedMainComponentIngress
,
func
()
bool
{
res
.
IngressControllerClassName
=
&
r
.
IngressControllerClassName
return
true
}))
// generate the main component virtual service
mainComponentVirtualService
:=
dynamo
.
GenerateComponentVirtualService
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
,
ingressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component virtual service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component virtual service: %w"
,
err
)
}
}
if
r
.
IngressControllerTLSSecret
!=
""
{
_
,
syncedMainComponentVirtualService
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
res
.
TLS
=
&
nvidiacomv1alpha1
.
IngressTLSSpec
{
vsEnabled
:=
ingressSpec
.
Enabled
&&
ingressSpec
.
UseVirtualService
&&
ingressSpec
.
VirtualServiceGateway
!=
nil
SecretName
:
r
.
IngressControllerTLSSecret
,
if
!
vsEnabled
{
logger
.
Info
(
"VirtualService is not enabled"
)
return
mainComponentVirtualService
,
true
,
nil
}
}
return
mainComponentVirtualService
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the main component virtual service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component virtual service: %w"
,
err
)
}
}
if
r
.
IngressHostSuffix
!=
""
{
resources
=
append
(
resources
,
commonController
.
WrapResource
(
syncedMainComponentVirtualService
,
func
()
bool
{
res
.
HostSuffix
=
&
r
.
IngressHostSuffix
return
true
}))
}
}
if
r
.
VirtualServiceGateway
!=
""
{
res
.
VirtualServiceGateway
=
&
r
.
VirtualServiceGateway
}
}
return
r
es
return
r
.
checkResourcesReadiness
(
resources
)
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
isEndpointSecured
()
bool
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
checkResourcesReadiness
(
resources
[]
Resource
)
(
State
,
Reason
,
Message
,
error
)
{
if
r
.
VirtualServiceGateway
!=
""
&&
r
.
Config
.
VirtualServiceSupportsHTTPS
{
notReadyResources
:=
[]
string
{}
return
true
for
_
,
resource
:=
range
resources
{
if
!
resource
.
IsReady
()
{
notReadyResources
=
append
(
notReadyResources
,
resource
.
GetName
())
}
}
if
len
(
notReadyResources
)
==
0
{
return
ReadyState
,
"all_resources_are_ready"
,
Message
(
"All resources are ready"
),
nil
}
}
return
r
.
IngressControllerTLSSecret
!=
""
return
PendingState
,
"some_resources_are_not_ready"
,
Message
(
fmt
.
Sprintf
(
"%d resources not ready: %v"
,
len
(
notReadyResources
),
notReadyResources
)),
nil
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileDynamoComponentsDeployments
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
resources
:=
[]
Resource
{}
logger
:=
log
.
FromContext
(
ctx
)
// generate the dynamoComponentsDeployments from the config
defaultIngressSpec
:=
dynamo
.
GenerateDefaultIngressSpec
(
dynamoDeployment
,
r
.
Config
.
IngressConfig
)
dynamoComponentsDeployments
,
err
:=
dynamo
.
GenerateDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
,
&
defaultIngressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the DynamoComponentsDeployments"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the DynamoComponentsDeployments: %w"
,
err
)
}
// reconcile the dynamoComponentsDeployments
for
serviceName
,
dynamoComponentDeployment
:=
range
dynamoComponentsDeployments
{
logger
.
Info
(
"Reconciling the DynamoComponentDeployment"
,
"serviceName"
,
serviceName
,
"dynamoComponentDeployment"
,
dynamoComponentDeployment
)
_
,
dynamoComponentDeployment
,
err
=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
nvidiacomv1alpha1
.
DynamoComponentDeployment
,
bool
,
error
)
{
return
dynamoComponentDeployment
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the DynamoComponentDeployment"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the DynamoComponentDeployment: %w"
,
err
)
}
resources
=
append
(
resources
,
dynamoComponentDeployment
)
}
return
r
.
checkResourcesReadiness
(
resources
)
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
FinalizeResource
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
FinalizeResource
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
...
@@ -200,7 +295,7 @@ func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context,
...
@@ -200,7 +295,7 @@ func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context,
// SetupWithManager sets up the controller with the Manager.
// SetupWithManager sets up the controller with the Manager.
func
(
r
*
DynamoGraphDeploymentReconciler
)
SetupWithManager
(
mgr
ctrl
.
Manager
)
error
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
SetupWithManager
(
mgr
ctrl
.
Manager
)
error
{
return
ctrl
.
NewControllerManagedBy
(
mgr
)
.
ctrlBuilder
:=
ctrl
.
NewControllerManagedBy
(
mgr
)
.
For
(
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{},
builder
.
WithPredicates
(
For
(
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{},
predicate
.
GenerationChangedPredicate
{},
))
.
))
.
...
@@ -212,8 +307,17 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
...
@@ -212,8 +307,17 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
}))
.
}))
.
WithEventFilter
(
commonController
.
EphemeralDeploymentEventFilter
(
r
.
Config
))
.
WithEventFilter
(
commonController
.
EphemeralDeploymentEventFilter
(
r
.
Config
))
Complete
(
r
)
if
r
.
Config
.
EnableGrove
{
ctrlBuilder
=
ctrlBuilder
.
Owns
(
&
grovev1alpha1
.
PodGangSet
{},
builder
.
WithPredicates
(
predicate
.
Funcs
{
// ignore creation cause we don't want to be called again after we create the pod gang set
CreateFunc
:
func
(
ce
event
.
CreateEvent
)
bool
{
return
false
},
DeleteFunc
:
func
(
de
event
.
DeleteEvent
)
bool
{
return
true
},
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
}))
}
return
ctrlBuilder
.
Complete
(
r
)
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
GetRecorder
()
record
.
EventRecorder
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
GetRecorder
()
record
.
EventRecorder
{
...
...
deploy/cloud/operator/internal/controller_common/predicate.go
View file @
ee3a8e42
...
@@ -30,9 +30,22 @@ import (
...
@@ -30,9 +30,22 @@ import (
type
Config
struct
{
type
Config
struct
{
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
RestrictedNamespace
string
RestrictedNamespace
string
// If true, assume VirtualService endpoints are HTTPS
VirtualServiceSupportsHTTPS
bool
EnableLWS
bool
EnableLWS
bool
EnableGrove
bool
EtcdAddress
string
NatsAddress
string
IngressConfig
IngressConfig
}
type
IngressConfig
struct
{
VirtualServiceGateway
string
IngressControllerClassName
string
IngressControllerTLSSecret
string
IngressHostSuffix
string
}
func
(
i
*
IngressConfig
)
UseVirtualService
()
bool
{
return
i
.
VirtualServiceGateway
!=
""
}
}
func
EphemeralDeploymentEventFilter
(
config
Config
)
predicate
.
Predicate
{
func
EphemeralDeploymentEventFilter
(
config
Config
)
predicate
.
Predicate
{
...
...
deploy/cloud/operator/internal/controller_common/resource.go
View file @
ee3a8e42
...
@@ -25,8 +25,11 @@ import (
...
@@ -25,8 +25,11 @@ import (
"reflect"
"reflect"
"sort"
"sort"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/types"
...
@@ -352,3 +355,104 @@ func firstKey(m map[string]interface{}) string {
...
@@ -352,3 +355,104 @@ func firstKey(m map[string]interface{}) string {
sort
.
Strings
(
keys
)
sort
.
Strings
(
keys
)
return
keys
[
0
]
return
keys
[
0
]
}
}
func
GetResourcesConfig
(
resources
*
common
.
Resources
)
(
*
corev1
.
ResourceRequirements
,
error
)
{
if
resources
==
nil
{
return
nil
,
nil
}
currentResources
:=
&
corev1
.
ResourceRequirements
{}
if
resources
.
Limits
!=
nil
{
if
resources
.
Limits
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
CPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits cpu quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Limits
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
Memory
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits memory quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceMemory
]
=
q
}
if
resources
.
Limits
.
GPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
GPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits gpu quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
)]
=
q
}
for
k
,
v
:=
range
resources
.
Limits
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits %s quantity: %w"
,
k
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
if
resources
.
Requests
!=
nil
{
if
resources
.
Requests
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
CPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests cpu quantity: %w"
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Requests
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
Memory
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests memory quantity: %w"
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceMemory
]
=
q
}
for
k
,
v
:=
range
resources
.
Requests
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests %s quantity: %w"
,
k
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
return
currentResources
,
nil
}
type
Resource
struct
{
client
.
Object
isReady
func
()
bool
}
func
WrapResource
[
T
client
.
Object
](
resource
T
,
isReady
func
()
bool
)
*
Resource
{
return
&
Resource
{
Object
:
resource
,
isReady
:
isReady
,
}
}
func
(
r
*
Resource
)
IsReady
()
bool
{
return
r
.
isReady
()
}
deploy/cloud/operator/internal/dynamo/graph.go
View file @
ee3a8e42
...
@@ -21,13 +21,24 @@ import (
...
@@ -21,13 +21,24 @@ import (
"context"
"context"
"encoding/json"
"encoding/json"
"fmt"
"fmt"
"sort"
"strconv"
"strconv"
"strings"
"strings"
istioNetworking
"istio.io/api/networking/v1beta1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/imdario/mergo"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
networkingv1
"k8s.io/api/networking/v1"
)
)
// ServiceConfig represents the YAML configuration structure for a service
// ServiceConfig represents the YAML configuration structure for a service
...
@@ -129,13 +140,13 @@ func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComp
...
@@ -129,13 +140,13 @@ func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComp
}
}
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
func
GenerateDynamoComponentsDeployments
(
ctx
context
.
Context
,
parentDynamoGraphDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
i
ngressSpec
*
v1alpha1
.
IngressSpec
)
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
,
error
)
{
func
GenerateDynamoComponentsDeployments
(
ctx
context
.
Context
,
parentDynamoGraphDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
defaultI
ngressSpec
*
v1alpha1
.
IngressSpec
)
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
,
error
)
{
deployments
:=
make
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
)
deployments
:=
make
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
)
graphDynamoNamespace
:=
""
graphDynamoNamespace
:=
""
for
componentName
,
component
:=
range
parentDynamoGraphDeployment
.
Spec
.
Services
{
for
componentName
,
component
:=
range
parentDynamoGraphDeployment
.
Spec
.
Services
{
deployment
:=
&
v1alpha1
.
DynamoComponentDeployment
{}
deployment
:=
&
v1alpha1
.
DynamoComponentDeployment
{}
deployment
.
Spec
.
DynamoComponentDeploymentSharedSpec
=
component
.
DynamoComponentDeploymentSharedSpec
deployment
.
Spec
.
DynamoComponentDeploymentSharedSpec
=
component
.
DynamoComponentDeploymentSharedSpec
deployment
.
Name
=
g
etDynamoComponentName
(
parentDynamoGraphDeployment
,
componentName
)
deployment
.
Name
=
G
etDynamoComponentName
(
parentDynamoGraphDeployment
,
componentName
)
deployment
.
Namespace
=
parentDynamoGraphDeployment
.
Namespace
deployment
.
Namespace
=
parentDynamoGraphDeployment
.
Namespace
deployment
.
Spec
.
ServiceName
=
componentName
deployment
.
Spec
.
ServiceName
=
componentName
dynamoNamespace
:=
GetDefaultDynamoNamespace
(
ctx
,
parentDynamoGraphDeployment
)
dynamoNamespace
:=
GetDefaultDynamoNamespace
(
ctx
,
parentDynamoGraphDeployment
)
...
@@ -160,8 +171,8 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
...
@@ -160,8 +171,8 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
}
}
deployment
.
Spec
.
ExtraPodSpec
.
ServiceAccountName
=
commonconsts
.
PlannerServiceAccountName
deployment
.
Spec
.
ExtraPodSpec
.
ServiceAccountName
=
commonconsts
.
PlannerServiceAccountName
}
}
if
deployment
.
IsMainComponent
()
&&
i
ngressSpec
!=
nil
{
if
deployment
.
IsMainComponent
()
&&
defaultI
ngressSpec
!=
nil
&&
deployment
.
Spec
.
Ingress
==
nil
{
deployment
.
Spec
.
Ingress
=
*
i
ngressSpec
deployment
.
Spec
.
Ingress
=
defaultI
ngressSpec
}
}
// merge the envs from the parent deployment with the envs from the service
// merge the envs from the parent deployment with the envs from the service
if
len
(
parentDynamoGraphDeployment
.
Spec
.
Envs
)
>
0
{
if
len
(
parentDynamoGraphDeployment
.
Spec
.
Envs
)
>
0
{
...
@@ -286,9 +297,271 @@ func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
...
@@ -286,9 +297,271 @@ func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
for
_
,
env
:=
range
envMap
{
for
_
,
env
:=
range
envMap
{
merged
=
append
(
merged
,
env
)
merged
=
append
(
merged
,
env
)
}
}
sort
.
Slice
(
merged
,
func
(
i
,
j
int
)
bool
{
return
merged
[
i
]
.
Name
<
merged
[
j
]
.
Name
})
return
merged
return
merged
}
}
func
g
etDynamoComponentName
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
component
string
)
string
{
func
G
etDynamoComponentName
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
component
string
)
string
{
return
fmt
.
Sprintf
(
"%s-%s"
,
dynamoDeployment
.
Name
,
strings
.
ToLower
(
component
))
return
fmt
.
Sprintf
(
"%s-%s"
,
dynamoDeployment
.
Name
,
strings
.
ToLower
(
component
))
}
}
type
SecretsRetriever
interface
{
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
}
func
GenerateGrovePodGangSet
(
ctx
context
.
Context
,
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
controllerConfig
controller_common
.
Config
,
secretsRetriever
SecretsRetriever
)
(
*
grovev1alpha1
.
PodGangSet
,
error
)
{
gangSet
:=
&
grovev1alpha1
.
PodGangSet
{}
gangSet
.
Name
=
dynamoDeployment
.
Name
gangSet
.
Namespace
=
dynamoDeployment
.
Namespace
gangSet
.
Spec
.
Replicas
=
1
for
componentName
,
component
:=
range
dynamoDeployment
.
Spec
.
Services
{
container
:=
corev1
.
Container
{
Name
:
"main"
,
LivenessProbe
:
component
.
LivenessProbe
,
ReadinessProbe
:
component
.
ReadinessProbe
,
Env
:
component
.
Envs
,
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
}
resourcesConfig
,
err
:=
controller_common
.
GetResourcesConfig
(
component
.
Resources
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to get resources config: %w"
,
err
)
}
container
.
Resources
=
*
resourcesConfig
if
component
.
ExtraPodSpec
!=
nil
&&
component
.
ExtraPodSpec
.
MainContainer
!=
nil
{
// merge the extraPodSpec from the parent deployment with the extraPodSpec from the service
err
:=
mergo
.
Merge
(
&
container
,
*
component
.
ExtraPodSpec
.
MainContainer
.
DeepCopy
(),
mergo
.
WithOverride
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to merge extraPodSpec: %w"
,
err
)
}
}
// retrieve the image pull secrets for the container
imagePullSecrets
:=
[]
corev1
.
LocalObjectReference
{}
if
secretsRetriever
!=
nil
{
secretsName
,
err
:=
secretsRetriever
.
GetSecrets
(
dynamoDeployment
.
Namespace
,
container
.
Image
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to get secrets for component %s and image %s: %w"
,
componentName
,
container
.
Image
,
err
)
}
for
_
,
secretName
:=
range
secretsName
{
imagePullSecrets
=
append
(
imagePullSecrets
,
corev1
.
LocalObjectReference
{
Name
:
secretName
,
})
}
}
// merge the envs from the parent deployment with the envs from the service
if
len
(
dynamoDeployment
.
Spec
.
Envs
)
>
0
{
container
.
Env
=
mergeEnvs
(
dynamoDeployment
.
Spec
.
Envs
,
container
.
Env
)
}
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
commonconsts
.
EnvDynamoServicePort
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
})
if
controllerConfig
.
NatsAddress
!=
""
{
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
"NATS_SERVER"
,
Value
:
controllerConfig
.
NatsAddress
,
})
}
if
controllerConfig
.
EtcdAddress
!=
""
{
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
controllerConfig
.
EtcdAddress
,
})
}
if
component
.
EnvFromSecret
!=
nil
{
container
.
EnvFrom
=
append
(
container
.
EnvFrom
,
corev1
.
EnvFromSource
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
*
component
.
EnvFromSecret
},
},
})
}
gangSet
.
Spec
.
Template
.
Cliques
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
,
&
grovev1alpha1
.
PodCliqueTemplateSpec
{
Name
:
strings
.
ToLower
(
componentName
),
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
strings
.
ToLower
(
componentName
),
Replicas
:
func
()
int32
{
if
component
.
Replicas
!=
nil
{
return
*
component
.
Replicas
}
return
1
}(),
PodSpec
:
corev1
.
PodSpec
{
Containers
:
[]
corev1
.
Container
{
container
},
ImagePullSecrets
:
imagePullSecrets
,
},
},
})
if
component
.
PVC
!=
nil
{
cliqueIndex
:=
len
(
gangSet
.
Spec
.
Template
.
Cliques
)
-
1
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Volumes
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Volumes
,
corev1
.
Volume
{
Name
:
*
component
.
PVC
.
Name
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
*
component
.
PVC
.
Name
,
},
},
})
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Containers
[
0
]
.
VolumeMounts
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Containers
[
0
]
.
VolumeMounts
,
corev1
.
VolumeMount
{
Name
:
*
component
.
PVC
.
Name
,
MountPath
:
*
component
.
PVC
.
MountPoint
,
})
}
}
return
gangSet
,
nil
}
func
GenerateComponentService
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
)
(
*
corev1
.
Service
,
error
)
{
service
:=
&
corev1
.
Service
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
componentName
,
Namespace
:
componentNamespace
,
},
Spec
:
corev1
.
ServiceSpec
{
Selector
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
componentName
,
},
Ports
:
[]
corev1
.
ServicePort
{
{
Name
:
commonconsts
.
DynamoServicePortName
,
Port
:
commonconsts
.
DynamoServicePort
,
TargetPort
:
intstr
.
FromString
(
commonconsts
.
DynamoContainerPortName
),
Protocol
:
corev1
.
ProtocolTCP
,
},
},
},
}
return
service
,
nil
}
func
GenerateComponentIngress
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
,
ingressSpec
v1alpha1
.
IngressSpec
)
*
networkingv1
.
Ingress
{
resourceName
:=
componentName
ingress
:=
&
networkingv1
.
Ingress
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
resourceName
,
Namespace
:
componentNamespace
,
},
}
host
:=
getIngressHost
(
ingressSpec
)
ingress
.
Spec
=
networkingv1
.
IngressSpec
{
IngressClassName
:
ingressSpec
.
IngressControllerClassName
,
Rules
:
[]
networkingv1
.
IngressRule
{
{
Host
:
host
,
IngressRuleValue
:
networkingv1
.
IngressRuleValue
{
HTTP
:
&
networkingv1
.
HTTPIngressRuleValue
{
Paths
:
[]
networkingv1
.
HTTPIngressPath
{
{
Path
:
"/"
,
PathType
:
&
[]
networkingv1
.
PathType
{
networkingv1
.
PathTypePrefix
}[
0
],
Backend
:
networkingv1
.
IngressBackend
{
Service
:
&
networkingv1
.
IngressServiceBackend
{
Name
:
resourceName
,
Port
:
networkingv1
.
ServiceBackendPort
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
},
},
},
}
if
ingressSpec
.
TLS
!=
nil
{
ingress
.
Spec
.
TLS
=
[]
networkingv1
.
IngressTLS
{
{
Hosts
:
[]
string
{
host
},
SecretName
:
ingressSpec
.
TLS
.
SecretName
,
},
}
}
return
ingress
}
func
getIngressHost
(
ingressSpec
v1alpha1
.
IngressSpec
)
string
{
host
:=
ingressSpec
.
Host
if
ingressSpec
.
HostPrefix
!=
nil
{
host
=
*
ingressSpec
.
HostPrefix
+
host
}
ingressSuffix
:=
commonconsts
.
DefaultIngressSuffix
if
ingressSpec
.
HostSuffix
!=
nil
{
ingressSuffix
=
*
ingressSpec
.
HostSuffix
}
return
fmt
.
Sprintf
(
"%s.%s"
,
host
,
ingressSuffix
)
}
func
GenerateComponentVirtualService
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
,
ingressSpec
v1alpha1
.
IngressSpec
)
*
networkingv1beta1
.
VirtualService
{
vs
:=
&
networkingv1beta1
.
VirtualService
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
componentName
,
Namespace
:
componentNamespace
,
},
}
vs
.
Spec
=
istioNetworking
.
VirtualService
{
Hosts
:
[]
string
{
getIngressHost
(
ingressSpec
),
},
Gateways
:
[]
string
{
*
ingressSpec
.
VirtualServiceGateway
},
Http
:
[]
*
istioNetworking
.
HTTPRoute
{
{
Match
:
[]
*
istioNetworking
.
HTTPMatchRequest
{
{
Uri
:
&
istioNetworking
.
StringMatch
{
MatchType
:
&
istioNetworking
.
StringMatch_Prefix
{
Prefix
:
"/"
},
},
},
},
Route
:
[]
*
istioNetworking
.
HTTPRouteDestination
{
{
Destination
:
&
istioNetworking
.
Destination
{
Host
:
componentName
,
Port
:
&
istioNetworking
.
PortSelector
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
}
return
vs
}
func
GenerateDefaultIngressSpec
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
ingressConfig
controller_common
.
IngressConfig
)
v1alpha1
.
IngressSpec
{
res
:=
v1alpha1
.
IngressSpec
{
Enabled
:
ingressConfig
.
VirtualServiceGateway
!=
""
||
ingressConfig
.
IngressControllerClassName
!=
""
,
Host
:
dynamoDeployment
.
Name
,
UseVirtualService
:
ingressConfig
.
VirtualServiceGateway
!=
""
,
}
if
ingressConfig
.
IngressControllerClassName
!=
""
{
res
.
IngressControllerClassName
=
&
ingressConfig
.
IngressControllerClassName
}
if
ingressConfig
.
IngressControllerTLSSecret
!=
""
{
res
.
TLS
=
&
v1alpha1
.
IngressTLSSpec
{
SecretName
:
ingressConfig
.
IngressControllerTLSSecret
,
}
}
if
ingressConfig
.
IngressHostSuffix
!=
""
{
res
.
HostSuffix
=
&
ingressConfig
.
IngressHostSuffix
}
if
ingressConfig
.
VirtualServiceGateway
!=
""
{
res
.
VirtualServiceGateway
=
&
ingressConfig
.
VirtualServiceGateway
}
return
res
}
deploy/cloud/operator/internal/dynamo/graph_test.go
View file @
ee3a8e42
...
@@ -24,15 +24,18 @@ import (
...
@@ -24,15 +24,18 @@ import (
"sort"
"sort"
"testing"
"testing"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
compounaiCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
compounaiCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
)
)
func
TestGenerateDynamoComponentsDeployments
(
t
*
testing
.
T
)
{
func
TestGenerateDynamoComponentsDeployments
(
t
*
testing
.
T
)
{
...
@@ -88,7 +91,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -88,7 +91,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -197,7 +199,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -197,7 +199,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -306,7 +307,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -306,7 +307,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
nil
,
want
:
nil
,
wantErr
:
true
,
wantErr
:
true
,
...
@@ -387,7 +387,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -387,7 +387,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts
.
KubeLabelDynamoNamespace
:
"dynamo-test-dynamographdeployment"
,
commonconsts
.
KubeLabelDynamoNamespace
:
"dynamo-test-dynamographdeployment"
,
},
},
Autoscaling
:
nil
,
Autoscaling
:
nil
,
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"test-dynamographdeployment"
,
Host
:
"test-dynamographdeployment"
,
},
},
...
@@ -607,7 +607,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -607,7 +607,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -1117,3 +1116,398 @@ func Test_mergeEnvs(t *testing.T) {
...
@@ -1117,3 +1116,398 @@ func Test_mergeEnvs(t *testing.T) {
})
})
}
}
}
}
func
TestGenerateGrovePodGangSet
(
t
*
testing
.
T
)
{
type
args
struct
{
ctx
context
.
Context
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
controllerConfig
controller_common
.
Config
}
tests
:=
[]
struct
{
name
string
args
args
want
*
grovev1alpha1
.
PodGangSet
wantErr
bool
}{
{
name
:
"test_generate_grove_pod_gang_set"
,
args
:
args
{
ctx
:
context
.
Background
(),
controllerConfig
:
controller_common
.
Config
{
EtcdAddress
:
"etcd-address"
,
NatsAddress
:
"nats-address"
,
},
dynamoDeployment
:
&
v1alpha1
.
DynamoGraphDeployment
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
"test-dynamo-graph-deployment"
,
Namespace
:
"test-namespace"
,
},
Spec
:
v1alpha1
.
DynamoGraphDeploymentSpec
{
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
},
Services
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{
"Frontend"
:
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Replicas
:
&
[]
int32
{
1
}[
0
],
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
GPU
:
"1"
,
},
},
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"FRONTEND_ENV_1"
,
Value
:
"1"
,
},
},
EnvFromSecret
:
&
[]
string
{
"frontend-secret"
}[
0
],
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $FRONTEND_ENV_1"
,
},
Args
:
[]
string
{
"--frontend-env-1"
,
"1"
,
},
Image
:
"frontend-image"
,
},
},
},
},
"Planner"
:
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Replicas
:
&
[]
int32
{
2
}[
0
],
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"2"
,
Memory
:
"2Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"2"
,
Memory
:
"2Gi"
,
GPU
:
"2"
,
},
},
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"PLANNER_ENV_1"
,
Value
:
"2"
,
},
},
PVC
:
&
v1alpha1
.
PVC
{
Name
:
&
[]
string
{
"planner-pvc"
}[
0
],
MountPoint
:
&
[]
string
{
"/planner"
}[
0
],
},
EnvFromSecret
:
&
[]
string
{
"planner-secret"
}[
0
],
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $PLANNER_ENV_1"
,
},
Args
:
[]
string
{
"--planner-env-1"
,
"1"
,
},
Image
:
"planner-image"
,
},
},
},
},
},
},
},
},
want
:
&
grovev1alpha1
.
PodGangSet
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
"test-dynamo-graph-deployment"
,
Namespace
:
"test-namespace"
,
},
Spec
:
grovev1alpha1
.
PodGangSetSpec
{
Replicas
:
1
,
Template
:
grovev1alpha1
.
PodGangSetTemplateSpec
{
Cliques
:
[]
*
grovev1alpha1
.
PodCliqueTemplateSpec
{
{
Name
:
"frontend"
,
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
"test-dynamo-graph-deployment-frontend"
,
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
"frontend"
,
Replicas
:
1
,
PodSpec
:
corev1
.
PodSpec
{
ImagePullSecrets
:
[]
corev1
.
LocalObjectReference
{},
Containers
:
[]
corev1
.
Container
{
{
Name
:
"main"
,
Image
:
"frontend-image"
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $FRONTEND_ENV_1"
,
},
Args
:
[]
string
{
"--frontend-env-1"
,
"1"
,
},
EnvFrom
:
[]
corev1
.
EnvFromSource
{
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
"frontend-secret"
,
},
},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
{
Name
:
"FRONTEND_ENV_1"
,
Value
:
"1"
,
},
{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
},
{
Name
:
"NATS_SERVER"
,
Value
:
"nats-address"
,
},
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
"etcd-address"
,
},
},
Resources
:
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"1"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"1"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"1"
),
},
},
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
},
},
},
},
},
{
Name
:
"planner"
,
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
"test-dynamo-graph-deployment-planner"
,
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
"planner"
,
Replicas
:
2
,
PodSpec
:
corev1
.
PodSpec
{
ImagePullSecrets
:
[]
corev1
.
LocalObjectReference
{},
Volumes
:
[]
corev1
.
Volume
{
{
Name
:
"planner-pvc"
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
"planner-pvc"
,
},
},
},
},
Containers
:
[]
corev1
.
Container
{
{
Name
:
"main"
,
Image
:
"planner-image"
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $PLANNER_ENV_1"
,
},
Args
:
[]
string
{
"--planner-env-1"
,
"1"
,
},
EnvFrom
:
[]
corev1
.
EnvFromSource
{
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
"planner-secret"
,
},
},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
{
Name
:
"PLANNER_ENV_1"
,
Value
:
"2"
,
},
{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
},
{
Name
:
"NATS_SERVER"
,
Value
:
"nats-address"
,
},
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
"etcd-address"
,
},
},
Resources
:
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"2"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"2Gi"
),
},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"2"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"2Gi"
),
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"2"
),
},
},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
Name
:
"planner-pvc"
,
MountPath
:
"/planner"
,
},
},
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
},
},
},
},
},
},
},
},
},
wantErr
:
false
,
},
}
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
got
,
err
:=
GenerateGrovePodGangSet
(
tt
.
args
.
ctx
,
tt
.
args
.
dynamoDeployment
,
tt
.
args
.
controllerConfig
,
nil
)
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
t
.
Errorf
(
"GenerateGrovePodGangSet() error = %v, wantErr %v"
,
err
,
tt
.
wantErr
)
return
}
sort
.
Slice
(
got
.
Spec
.
Template
.
Cliques
,
func
(
i
,
j
int
)
bool
{
return
got
.
Spec
.
Template
.
Cliques
[
i
]
.
Name
<
got
.
Spec
.
Template
.
Cliques
[
j
]
.
Name
})
sort
.
Slice
(
tt
.
want
.
Spec
.
Template
.
Cliques
,
func
(
i
,
j
int
)
bool
{
return
tt
.
want
.
Spec
.
Template
.
Cliques
[
i
]
.
Name
<
tt
.
want
.
Spec
.
Template
.
Cliques
[
j
]
.
Name
})
if
diff
:=
cmp
.
Diff
(
got
,
tt
.
want
);
diff
!=
""
{
t
.
Errorf
(
"GenerateGrovePodGangSet() mismatch (-want +got):
\n
%s"
,
diff
)
}
})
}
}
deploy/helm/chart/templates/deployment.yaml
View file @
ee3a8e42
...
@@ -38,8 +38,14 @@ spec:
...
@@ -38,8 +38,14 @@ spec:
-
name
:
{{
$.Release.Name
}}
-{{ $serviceName | lower }}
-
name
:
{{
$.Release.Name
}}
-{{ $serviceName | lower }}
image
:
{{
$serviceSpec.extraPodSpec.mainContainer.image
}}
image
:
{{
$serviceSpec.extraPodSpec.mainContainer.image
}}
workingDir
:
{{
$serviceSpec.extraPodSpec.mainContainer.workingDir
}}
workingDir
:
{{
$serviceSpec.extraPodSpec.mainContainer.workingDir
}}
{{
- if $serviceSpec.extraPodSpec.mainContainer.command
}}
command
:
{{
- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 8
}}
{{
- end
}}
{{
- if $serviceSpec.extraPodSpec.mainContainer.args
}}
args
:
args
:
{{
- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 8
}}
{{
- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 8
}}
{{
- end
}}
{{
if $serviceSpec.resources
}}
{{
if $serviceSpec.resources
}}
resources
:
resources
:
requests
:
requests
:
...
@@ -83,8 +89,8 @@ spec:
...
@@ -83,8 +89,8 @@ spec:
-
name
:
health
-
name
:
health
containerPort
:
{{
$.Values.healthPort | default 5000
}}
containerPort
:
{{
$.Values.healthPort | default 5000
}}
livenessProbe
:
livenessProbe
:
{{
- if $serviceSpec.
extraPodSpec.mainContainer.
livenessProbe
}}
{{
- if $serviceSpec.livenessProbe
}}
{{
$serviceSpec.
extraPodSpec.mainContainer.
livenessProbe | toYaml | nindent 10
}}
{{
$serviceSpec.livenessProbe | toYaml | nindent 10
}}
{{
- else
}}
{{
- else
}}
initialDelaySeconds
:
60
initialDelaySeconds
:
60
periodSeconds
:
60
periodSeconds
:
60
...
@@ -97,8 +103,8 @@ spec:
...
@@ -97,8 +103,8 @@ spec:
scheme
:
HTTP
scheme
:
HTTP
{{
- end
}}
{{
- end
}}
readinessProbe
:
readinessProbe
:
{{
- if $serviceSpec.
extraPodSpec.mainContainer.
readinessProbe
}}
{{
- if $serviceSpec.readinessProbe
}}
{{
$serviceSpec.
extraPodSpec.mainContainer.
readinessProbe | toYaml | nindent 10
}}
{{
$serviceSpec.readinessProbe | toYaml | nindent 10
}}
{{
- else
}}
{{
- else
}}
initialDelaySeconds
:
60
initialDelaySeconds
:
60
periodSeconds
:
60
periodSeconds
:
60
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment