Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
ee3a8e42
"lib/llm/vscode:/vscode.git/clone" did not exist on "b5db4e7555d838f0fbdc53e868b82547dedecca7"
Unverified
Commit
ee3a8e42
authored
Jul 23, 2025
by
julienmancuso
Committed by
GitHub
Jul 23, 2025
Browse files
feat: add initial Grove support (#2012)
parent
19a77ae7
Changes
34
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1114 additions
and
386 deletions
+1114
-386
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
...r/config/crd/bases/nvidia.com_dynamographdeployments.yaml
+16
-0
deploy/cloud/operator/config/rbac/role.yaml
deploy/cloud/operator/config/rbac/role.yaml
+12
-0
deploy/cloud/operator/go.mod
deploy/cloud/operator/go.mod
+17
-18
deploy/cloud/operator/go.sum
deploy/cloud/operator/go.sum
+36
-29
deploy/cloud/operator/internal/consts/consts.go
deploy/cloud/operator/internal/consts/consts.go
+4
-0
deploy/cloud/operator/internal/controller/common.go
deploy/cloud/operator/internal/controller/common.go
+0
-14
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
...ternal/controller/dynamocomponentdeployment_controller.go
+22
-207
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+20
-25
deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
...r/internal/controller/dynamographdeployment_controller.go
+178
-74
deploy/cloud/operator/internal/controller_common/predicate.go
...oy/cloud/operator/internal/controller_common/predicate.go
+16
-3
deploy/cloud/operator/internal/controller_common/resource.go
deploy/cloud/operator/internal/controller_common/resource.go
+104
-0
deploy/cloud/operator/internal/dynamo/graph.go
deploy/cloud/operator/internal/dynamo/graph.go
+278
-5
deploy/cloud/operator/internal/dynamo/graph_test.go
deploy/cloud/operator/internal/dynamo/graph_test.go
+401
-7
deploy/helm/chart/templates/deployment.yaml
deploy/helm/chart/templates/deployment.yaml
+10
-4
No files found.
deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml
View file @
ee3a8e42
...
@@ -148,6 +148,12 @@ spec:
...
@@ -148,6 +148,12 @@ spec:
stabilizationWindowSeconds
:
stabilizationWindowSeconds
:
format
:
int32
format
:
int32
type
:
integer
type
:
integer
tolerance
:
anyOf
:
-
type
:
integer
-
type
:
string
pattern
:
^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string
:
true
type
:
object
type
:
object
scaleUp
:
scaleUp
:
properties
:
properties
:
...
@@ -174,6 +180,12 @@ spec:
...
@@ -174,6 +180,12 @@ spec:
stabilizationWindowSeconds
:
stabilizationWindowSeconds
:
format
:
int32
format
:
int32
type
:
integer
type
:
integer
tolerance
:
anyOf
:
-
type
:
integer
-
type
:
string
pattern
:
^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string
:
true
type
:
object
type
:
object
type
:
object
type
:
object
enabled
:
enabled
:
...
@@ -1218,6 +1230,8 @@ spec:
...
@@ -1218,6 +1230,8 @@ spec:
-
port
-
port
type
:
object
type
:
object
type
:
object
type
:
object
stopSignal
:
type
:
string
type
:
object
type
:
object
livenessProbe
:
livenessProbe
:
properties
:
properties
:
...
@@ -1897,6 +1911,8 @@ spec:
...
@@ -1897,6 +1911,8 @@ spec:
-
port
-
port
type
:
object
type
:
object
type
:
object
type
:
object
stopSignal
:
type
:
string
type
:
object
type
:
object
livenessProbe
:
livenessProbe
:
properties
:
properties
:
...
...
deploy/cloud/operator/config/rbac/role.yaml
View file @
ee3a8e42
...
@@ -98,6 +98,18 @@ rules:
...
@@ -98,6 +98,18 @@ rules:
-
patch
-
patch
-
update
-
update
-
watch
-
watch
-
apiGroups
:
-
grove.io
resources
:
-
podgangsets
verbs
:
-
create
-
delete
-
get
-
list
-
patch
-
update
-
watch
-
apiGroups
:
-
apiGroups
:
-
leaderworkerset.x-k8s.io
-
leaderworkerset.x-k8s.io
resources
:
resources
:
...
...
deploy/cloud/operator/go.mod
View file @
ee3a8e42
...
@@ -6,27 +6,29 @@ toolchain go1.24.3
...
@@ -6,27 +6,29 @@ toolchain go1.24.3
require (
require (
emperror.dev/errors v0.8.1
emperror.dev/errors v0.8.1
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f
github.com/bsm/gomega v1.27.10
github.com/bsm/gomega v1.27.10
github.com/google/go-cmp v0.7.0
github.com/google/go-cmp v0.7.0
github.com/imdario/mergo v0.3.6
github.com/imdario/mergo v0.3.6
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/onsi/gomega v1.37.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
go.etcd.io/etcd/client/v3 v3.5.1
6
go.etcd.io/etcd/client/v3 v3.5.
2
1
istio.io/api v1.23.1
istio.io/api v1.23.1
istio.io/client-go v1.23.1
istio.io/client-go v1.23.1
k8s.io/api v0.3
2
.3
k8s.io/api v0.3
3
.3
k8s.io/apiextensions-apiserver v0.3
2
.3
k8s.io/apiextensions-apiserver v0.3
3
.3
k8s.io/apimachinery v0.3
2
.3
k8s.io/apimachinery v0.3
3
.3
k8s.io/client-go v0.3
2
.3
k8s.io/client-go v0.3
3
.3
k8s.io/utils v0.0.0-202
41210054802-24370beab758
k8s.io/utils v0.0.0-202
50502105355-0f33e8f1c979
sigs.k8s.io/controller-runtime v0.2
0.4
sigs.k8s.io/controller-runtime v0.2
1.0
sigs.k8s.io/lws v0.6.1
sigs.k8s.io/lws v0.6.1
volcano.sh/apis v1.11.0
volcano.sh/apis v1.11.0
)
)
require (
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
...
@@ -45,28 +47,24 @@ require (
...
@@ -45,28 +47,24 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.2
0.2
// indirect
github.com/prometheus/client_golang v1.2
2.0
// indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.
55
.0 // indirect
github.com/prometheus/common v0.
62
.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/stretchr/testify v1.10.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.etcd.io/etcd/api/v3 v3.5.1
6
// indirect
go.etcd.io/etcd/api/v3 v3.5.
2
1 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
// indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
...
@@ -79,7 +77,7 @@ require (
...
@@ -79,7 +77,7 @@ require (
golang.org/x/sys v0.33.0 // indirect
golang.org/x/sys v0.33.0 // indirect
golang.org/x/term v0.32.0 // indirect
golang.org/x/term v0.32.0 // indirect
golang.org/x/text v0.25.0 // indirect
golang.org/x/text v0.25.0 // indirect
golang.org/x/time v0.
7
.0 // indirect
golang.org/x/time v0.
9
.0 // indirect
golang.org/x/tools v0.33.0 // indirect
golang.org/x/tools v0.33.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 // indirect
...
@@ -90,8 +88,9 @@ require (
...
@@ -90,8 +88,9 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3
f // indirect
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2f
f // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
)
deploy/cloud/operator/go.sum
View file @
ee3a8e42
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f h1:2ePSNDm7/Tep8F99yCQVH8/vmn86L1cUzTbVlyNopmQ=
github.com/NVIDIA/grove/operator/api v0.0.0-20250717114148-daac6e53774f/go.mod h1:nJL33lsBe+9xCcZLYkNYg1wucE4hJfa4ZfHm1zamuG0=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
...
@@ -45,8 +49,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
...
@@ -45,8 +49,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/gnostic-models v0.6.
8
h1:
yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I
=
github.com/google/gnostic-models v0.6.
9
h1:
MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw
=
github.com/google/gnostic-models v0.6.
8
/go.mod h1:
5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U
=
github.com/google/gnostic-models v0.6.
9
/go.mod h1:
CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw
=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
...
@@ -95,12 +99,12 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4
...
@@ -95,12 +99,12 @@ github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 h1:HZdPRm0ApWPg7F4sHgbqWkL+ddWfpTZsopm5HM/2g4o=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 h1:HZdPRm0ApWPg7F4sHgbqWkL+ddWfpTZsopm5HM/2g4o=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2/go.mod h1:3RiUkFmR9kmPZi9r/8a5jw0a9yg+LMmr7qa0wjqvSiI=
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2/go.mod h1:3RiUkFmR9kmPZi9r/8a5jw0a9yg+LMmr7qa0wjqvSiI=
github.com/prometheus/client_golang v1.2
0.2
h1:
5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg
=
github.com/prometheus/client_golang v1.2
2.0
h1:
rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q
=
github.com/prometheus/client_golang v1.2
0.2
/go.mod h1:
PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE
=
github.com/prometheus/client_golang v1.2
2.0
/go.mod h1:
R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0
=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.
55
.0 h1:
KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc
=
github.com/prometheus/common v0.
62
.0 h1:
xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io
=
github.com/prometheus/common v0.
55
.0/go.mod h1:
2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8
=
github.com/prometheus/common v0.
62
.0/go.mod h1:
vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I
=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
...
@@ -108,6 +112,8 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN
...
@@ -108,6 +112,8 @@ github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWN
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
...
@@ -115,12 +121,12 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
...
@@ -115,12 +121,12 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/etcd/api/v3 v3.5.1
6
h1:
WvmyJVbjWqK4R1E+B12RRHz3bRGy9XVfh++MgbN+6n0
=
go.etcd.io/etcd/api/v3 v3.5.
2
1 h1:
A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8
=
go.etcd.io/etcd/api/v3 v3.5.1
6
/go.mod h1:
1P4SlIP/VwkDmGo3OlOD7faPeP8KDIFhqvciH5EfN28
=
go.etcd.io/etcd/api/v3 v3.5.
2
1/go.mod h1:
c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY
=
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
h1:
ZgY48uH6UvB+/7R9Yf4x574uCO3jIx0TRDyetSfId3Q
=
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1 h1:
lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc
=
go.etcd.io/etcd/client/pkg/v3 v3.5.1
6
/go.mod h1:
V8acl8pcEK0Y2g19YlOV9m9ssUe6MgiDSobSoaBAM0E
=
go.etcd.io/etcd/client/pkg/v3 v3.5.
2
1/go.mod h1:
BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs
=
go.etcd.io/etcd/client/v3 v3.5.1
6
h1:
sSmVYOAHeC9doqi0gv7v86oY/BTld0SEFGaxsU9eRhE
=
go.etcd.io/etcd/client/v3 v3.5.
2
1 h1:
T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY
=
go.etcd.io/etcd/client/v3 v3.5.1
6
/go.mod h1:
X+rExSGkyqxvu276cr2OwPLBaeqFu1cIl4vmRjAD/50
=
go.etcd.io/etcd/client/v3 v3.5.
2
1/go.mod h1:
mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU
=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
...
@@ -172,8 +178,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
...
@@ -172,8 +178,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.
7
.0 h1:
ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ
=
golang.org/x/time v0.
9
.0 h1:
EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY
=
golang.org/x/time v0.
7
.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.
9
.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
...
@@ -209,28 +215,29 @@ istio.io/api v1.23.1 h1:bm2XF0j058FfzWVHUfpmMj4sFDkcD1X609qs5AU97Pc=
...
@@ -209,28 +215,29 @@ istio.io/api v1.23.1 h1:bm2XF0j058FfzWVHUfpmMj4sFDkcD1X609qs5AU97Pc=
istio.io/api v1.23.1/go.mod h1:QPSTGXuIQdnZFEm3myf9NZ5uBMwCdJWUvfj9ZZ+2oBM=
istio.io/api v1.23.1/go.mod h1:QPSTGXuIQdnZFEm3myf9NZ5uBMwCdJWUvfj9ZZ+2oBM=
istio.io/client-go v1.23.1 h1:IX2cgUUXnVYo+9H6bFGSp/vuKVLPUkmiN8qk1/mvsYs=
istio.io/client-go v1.23.1 h1:IX2cgUUXnVYo+9H6bFGSp/vuKVLPUkmiN8qk1/mvsYs=
istio.io/client-go v1.23.1/go.mod h1:+fxu+O2GkITM3HEREUWdobvRXqI/UhAAI7hfxqqpRh0=
istio.io/client-go v1.23.1/go.mod h1:+fxu+O2GkITM3HEREUWdobvRXqI/UhAAI7hfxqqpRh0=
k8s.io/api v0.3
2
.3 h1:
Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls
=
k8s.io/api v0.3
3
.3 h1:
SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8
=
k8s.io/api v0.3
2
.3/go.mod h1:
2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k
=
k8s.io/api v0.3
3
.3/go.mod h1:
01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE
=
k8s.io/apiextensions-apiserver v0.3
2
.3 h1:
4D8vy+9GWerlErCwVIbcQjsWunF9SUGNu7O7hiQTyPY
=
k8s.io/apiextensions-apiserver v0.3
3
.3 h1:
qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs
=
k8s.io/apiextensions-apiserver v0.3
2
.3/go.mod h1:
8YwcvVRMVzw0r1Stc7XfGAzB/SIVLunqApySV5V7Dss
=
k8s.io/apiextensions-apiserver v0.3
3
.3/go.mod h1:
oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8
=
k8s.io/apimachinery v0.3
2
.3 h1:
JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U
=
k8s.io/apimachinery v0.3
3
.3 h1:
4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA
=
k8s.io/apimachinery v0.3
2
.3/go.mod h1:
GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE
=
k8s.io/apimachinery v0.3
3
.3/go.mod h1:
BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM
=
k8s.io/client-go v0.3
2
.3 h1:
RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU
=
k8s.io/client-go v0.3
3
.3 h1:
M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA
=
k8s.io/client-go v0.3
2
.3/go.mod h1:
3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY
=
k8s.io/client-go v0.3
3
.3/go.mod h1:
luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg
=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y
=
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4
=
k8s.io/kube-openapi v0.0.0-202
41105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4
=
k8s.io/kube-openapi v0.0.0-202
50318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8
=
k8s.io/utils v0.0.0-202
41
210
0
5
4802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0
=
k8s.io/utils v0.0.0-202
5050
2105
355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg
=
k8s.io/utils v0.0.0-202
41210054802-24370beab758
/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
k8s.io/utils v0.0.0-202
50502105355-0f33e8f1c979
/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/controller-runtime v0.2
0.4
h1:
X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU
=
sigs.k8s.io/controller-runtime v0.2
1.0
h1:
CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8
=
sigs.k8s.io/controller-runtime v0.2
0.4
/go.mod h1:
xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY
=
sigs.k8s.io/controller-runtime v0.2
1.0
/go.mod h1:
OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM
=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/lws v0.6.1 h1:cWiRmMSflo8hQPBrmIIZtoaX3XuVkmAgFKkmjxlPULI=
sigs.k8s.io/lws v0.6.1 h1:cWiRmMSflo8hQPBrmIIZtoaX3XuVkmAgFKkmjxlPULI=
sigs.k8s.io/lws v0.6.1/go.mod h1:aoT5ROMriBtN/H8JH0POBF6e2uyFCOxKGKtXSA3DVV8=
sigs.k8s.io/lws v0.6.1/go.mod h1:aoT5ROMriBtN/H8JH0POBF6e2uyFCOxKGKtXSA3DVV8=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
...
...
deploy/cloud/operator/internal/consts/consts.go
View file @
ee3a8e42
...
@@ -17,6 +17,8 @@ const (
...
@@ -17,6 +17,8 @@ const (
KubeLabelDynamoSelector
=
"nvidia.com/selector"
KubeLabelDynamoSelector
=
"nvidia.com/selector"
KubeAnnotationEnableGrove
=
"nvidia.com/enable-grove"
KubeLabelDynamoComponent
=
"nvidia.com/dynamo-component"
KubeLabelDynamoComponent
=
"nvidia.com/dynamo-component"
KubeLabelDynamoNamespace
=
"nvidia.com/dynamo-namespace"
KubeLabelDynamoNamespace
=
"nvidia.com/dynamo-namespace"
KubeLabelDynamoDeploymentTargetType
=
"nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoDeploymentTargetType
=
"nvidia.com/dynamo-deployment-target-type"
...
@@ -33,4 +35,6 @@ const (
...
@@ -33,4 +35,6 @@ const (
ComponentTypePlanner
=
"planner"
ComponentTypePlanner
=
"planner"
ComponentTypeMain
=
"main"
ComponentTypeMain
=
"main"
PlannerServiceAccountName
=
"planner-serviceaccount"
PlannerServiceAccountName
=
"planner-serviceaccount"
DefaultIngressSuffix
=
"local"
)
)
deploy/cloud/operator/internal/controller/common.go
View file @
ee3a8e42
...
@@ -18,8 +18,6 @@
...
@@ -18,8 +18,6 @@
package
controller
package
controller
import
(
import
(
"fmt"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
...
@@ -51,18 +49,6 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
...
@@ -51,18 +49,6 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
return
crd
.
GetName
()
return
crd
.
GetName
()
}
}
func
getIngressHost
(
ingressSpec
v1alpha1
.
IngressSpec
)
string
{
host
:=
ingressSpec
.
Host
if
ingressSpec
.
HostPrefix
!=
nil
{
host
=
*
ingressSpec
.
HostPrefix
+
host
}
ingressSuffix
:=
DefaultIngressSuffix
if
ingressSpec
.
HostSuffix
!=
nil
{
ingressSuffix
=
*
ingressSpec
.
HostSuffix
}
return
fmt
.
Sprintf
(
"%s.%s"
,
host
,
ingressSuffix
)
}
type
dockerSecretRetriever
interface
{
type
dockerSecretRetriever
interface
{
// returns a list of secret names associated with the docker registry
// returns a list of secret names associated with the docker registry
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
View file @
ee3a8e42
...
@@ -24,7 +24,6 @@ import (
...
@@ -24,7 +24,6 @@ import (
"fmt"
"fmt"
"os"
"os"
"strconv"
"strconv"
"strings"
"time"
"time"
"github.com/imdario/mergo"
"github.com/imdario/mergo"
...
@@ -41,7 +40,7 @@ import (
...
@@ -41,7 +40,7 @@ import (
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
istioNetworking
"istio.io/api/netw
or
k
in
g/v1beta1
"
"github.com/ai-dynamo/dynamo/deploy/cloud/operat
or
/
in
ternal/dynamo
"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
k8serrors
"k8s.io/apimachinery/pkg/api/errors"
k8serrors
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/meta"
...
@@ -73,7 +72,6 @@ const (
...
@@ -73,7 +72,6 @@ const (
DeploymentTargetTypeProduction
=
"production"
DeploymentTargetTypeProduction
=
"production"
DeploymentTargetTypeDebug
=
"debug"
DeploymentTargetTypeDebug
=
"debug"
HeaderNameDebug
=
"X-Nvidia-Debug"
HeaderNameDebug
=
"X-Nvidia-Debug"
DefaultIngressSuffix
=
"local"
KubernetesDeploymentStrategy
=
"kubernetes"
KubernetesDeploymentStrategy
=
"kubernetes"
KubeAnnotationDeploymentType
=
"nvidia.com/deployment-type"
KubeAnnotationDeploymentType
=
"nvidia.com/deployment-type"
...
@@ -88,10 +86,7 @@ type DynamoComponentDeploymentReconciler struct {
...
@@ -88,10 +86,7 @@ type DynamoComponentDeploymentReconciler struct {
client
.
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdAddr
string
EtcdStorage
etcdStorage
EtcdStorage
etcdStorage
UseVirtualService
bool
DockerSecretRetriever
dockerSecretRetriever
DockerSecretRetriever
dockerSecretRetriever
}
}
...
@@ -952,7 +947,7 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteIngress(ctx
...
@@ -952,7 +947,7 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteIngress(ctx
if
err
!=
nil
{
if
err
!=
nil
{
return
false
,
err
return
false
,
err
}
}
if
r
.
UseVirtualService
{
if
r
.
Config
.
IngressConfig
.
UseVirtualService
()
{
modified_
,
_
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
opt
.
dynamoComponentDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
modified_
,
_
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
opt
.
dynamoComponentDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
return
r
.
generateVirtualService
(
ctx
,
opt
)
return
r
.
generateVirtualService
(
ctx
,
opt
)
})
})
...
@@ -975,49 +970,11 @@ func (r *DynamoComponentDeploymentReconciler) generateIngress(ctx context.Contex
...
@@ -975,49 +970,11 @@ func (r *DynamoComponentDeploymentReconciler) generateIngress(ctx context.Contex
},
},
}
}
if
!
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
||
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
==
nil
{
if
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
==
nil
||
!
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
||
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
==
nil
{
log
.
Info
(
"Ingress is not enabled"
)
log
.
Info
(
"Ingress is not enabled"
)
return
ingress
,
true
,
nil
return
ingress
,
true
,
nil
}
}
host
:=
getIngressHost
(
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
)
return
dynamo
.
GenerateComponentIngress
(
ctx
,
opt
.
dynamoComponentDeployment
.
Name
,
opt
.
dynamoComponentDeployment
.
Namespace
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
false
,
nil
ingress
.
Spec
=
networkingv1
.
IngressSpec
{
IngressClassName
:
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
IngressControllerClassName
,
Rules
:
[]
networkingv1
.
IngressRule
{
{
Host
:
host
,
IngressRuleValue
:
networkingv1
.
IngressRuleValue
{
HTTP
:
&
networkingv1
.
HTTPIngressRuleValue
{
Paths
:
[]
networkingv1
.
HTTPIngressPath
{
{
Path
:
"/"
,
PathType
:
&
[]
networkingv1
.
PathType
{
networkingv1
.
PathTypePrefix
}[
0
],
Backend
:
networkingv1
.
IngressBackend
{
Service
:
&
networkingv1
.
IngressServiceBackend
{
Name
:
opt
.
dynamoComponentDeployment
.
Name
,
Port
:
networkingv1
.
ServiceBackendPort
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
},
},
},
}
if
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
TLS
!=
nil
{
ingress
.
Spec
.
TLS
=
[]
networkingv1
.
IngressTLS
{
{
Hosts
:
[]
string
{
host
},
SecretName
:
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
TLS
.
SecretName
,
},
}
}
return
ingress
,
false
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateVirtualService
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateVirtualService
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
...
@@ -1031,40 +988,12 @@ func (r *DynamoComponentDeploymentReconciler) generateVirtualService(ctx context
...
@@ -1031,40 +988,12 @@ func (r *DynamoComponentDeploymentReconciler) generateVirtualService(ctx context
},
},
}
}
vsEnabled
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
UseVirtualService
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
!=
nil
vsEnabled
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
!=
nil
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
Enabled
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
UseVirtualService
&&
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
!=
nil
if
!
vsEnabled
{
if
!
vsEnabled
{
log
.
Info
(
"VirtualService is not enabled"
)
log
.
Info
(
"VirtualService is not enabled"
)
return
vs
,
true
,
nil
return
vs
,
true
,
nil
}
}
return
dynamo
.
GenerateComponentVirtualService
(
ctx
,
opt
.
dynamoComponentDeployment
.
Name
,
opt
.
dynamoComponentDeployment
.
Namespace
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
false
,
nil
vs
.
Spec
=
istioNetworking
.
VirtualService
{
Hosts
:
[]
string
{
getIngressHost
(
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
),
},
Gateways
:
[]
string
{
*
opt
.
dynamoComponentDeployment
.
Spec
.
Ingress
.
VirtualServiceGateway
},
Http
:
[]
*
istioNetworking
.
HTTPRoute
{
{
Match
:
[]
*
istioNetworking
.
HTTPMatchRequest
{
{
Uri
:
&
istioNetworking
.
StringMatch
{
MatchType
:
&
istioNetworking
.
StringMatch_Prefix
{
Prefix
:
"/"
},
},
},
},
Route
:
[]
*
istioNetworking
.
HTTPRouteDestination
{
{
Destination
:
&
istioNetworking
.
Destination
{
Host
:
opt
.
dynamoComponentDeployment
.
Name
,
Port
:
&
istioNetworking
.
PortSelector
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
}
return
vs
,
false
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
getKubeName
(
dynamoComponentDeployment
*
v1alpha1
.
DynamoComponentDeployment
,
debug
bool
)
string
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
getKubeName
(
dynamoComponentDeployment
*
v1alpha1
.
DynamoComponentDeployment
,
debug
bool
)
string
{
...
@@ -1274,7 +1203,6 @@ func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOp
...
@@ -1274,7 +1203,6 @@ func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOp
//nolint:gocyclo,nakedret
//nolint:gocyclo,nakedret
func
(
r
*
DynamoComponentDeploymentReconciler
)
generatePodTemplateSpec
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
podTemplateSpec
*
corev1
.
PodTemplateSpec
,
err
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generatePodTemplateSpec
(
ctx
context
.
Context
,
opt
generateResourceOption
)
(
podTemplateSpec
*
corev1
.
PodTemplateSpec
,
err
error
)
{
logs
:=
log
.
FromContext
(
ctx
)
podLabels
:=
r
.
getKubeLabels
(
opt
.
dynamoComponentDeployment
)
podLabels
:=
r
.
getKubeLabels
(
opt
.
dynamoComponentDeployment
)
if
opt
.
isStealingTrafficDebugModeEnabled
{
if
opt
.
isStealingTrafficDebugModeEnabled
{
podLabels
[
commonconsts
.
KubeLabelDynamoDeploymentTargetType
]
=
DeploymentTargetTypeDebug
podLabels
[
commonconsts
.
KubeLabelDynamoDeploymentTargetType
]
=
DeploymentTargetTypeDebug
...
@@ -1333,17 +1261,17 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1333,17 +1261,17 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
},
},
}
}
if
r
.
NatsAddr
!=
""
{
if
r
.
Config
.
NatsAddr
ess
!=
""
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
Name
:
"NATS_SERVER"
,
Name
:
"NATS_SERVER"
,
Value
:
r
.
NatsAddr
,
Value
:
r
.
Config
.
NatsAddr
ess
,
})
})
}
}
if
r
.
EtcdAddr
!=
""
{
if
r
.
Config
.
EtcdAddr
ess
!=
""
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
defaultEnvs
=
append
(
defaultEnvs
,
corev1
.
EnvVar
{
Name
:
"ETCD_ENDPOINTS"
,
Name
:
"ETCD_ENDPOINTS"
,
Value
:
r
.
EtcdAddr
,
Value
:
r
.
Config
.
EtcdAddr
ess
,
})
})
}
}
...
@@ -1366,34 +1294,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1366,34 +1294,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
volumes
:=
make
([]
corev1
.
Volume
,
0
)
volumes
:=
make
([]
corev1
.
Volume
,
0
)
volumeMounts
:=
make
([]
corev1
.
VolumeMount
,
0
)
volumeMounts
:=
make
([]
corev1
.
VolumeMount
,
0
)
args
:=
make
([]
string
,
0
)
args
=
append
(
args
,
"cd"
,
"src"
,
"&&"
,
"uv"
,
"run"
,
"dynamo"
,
"serve"
)
// ensure liveness and readiness probes are enabled for the dynamo components
args
=
append
(
args
,
"--system-app-port"
,
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoHealthPort
))
args
=
append
(
args
,
"--enable-system-app"
)
args
=
append
(
args
,
"--use-default-health-checks"
)
if
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
!=
""
{
args
=
append
(
args
,
[]
string
{
"--service-name"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
}
...
)
args
=
append
(
args
,
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoTag
)
if
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
!=
nil
&&
*
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
!=
""
{
args
=
append
(
args
,
fmt
.
Sprintf
(
"--%s.ServiceArgs.dynamo.namespace=%s"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
,
*
opt
.
dynamoComponentDeployment
.
Spec
.
DynamoNamespace
))
}
if
componentType
,
exists
:=
opt
.
dynamoComponentDeployment
.
Labels
[
commonconsts
.
KubeLabelDynamoComponent
];
exists
&&
componentType
==
ComponentTypePlanner
{
args
=
append
(
args
,
fmt
.
Sprintf
(
"--%s.environment=%s"
,
opt
.
dynamoComponentDeployment
.
Spec
.
ServiceName
,
KubernetesDeploymentStrategy
))
}
}
if
len
(
opt
.
dynamoComponentDeployment
.
Spec
.
Envs
)
>
0
{
for
_
,
env
:=
range
opt
.
dynamoComponentDeployment
.
Spec
.
Envs
{
if
env
.
Name
==
"DYNAMO_CONFIG_PATH"
{
args
=
append
(
args
,
"-f"
,
env
.
Value
)
}
}
}
dynamoResources
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Resources
dynamoResources
:=
opt
.
dynamoComponentDeployment
.
Spec
.
Resources
resources
,
err
:=
getResourcesConfig
(
dynamoResources
)
resources
,
err
:=
getResourcesConfig
(
dynamoResources
)
...
@@ -1468,8 +1368,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1468,8 +1368,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
container
:=
corev1
.
Container
{
container
:=
corev1
.
Container
{
Name
:
"main"
,
Name
:
"main"
,
Image
:
imageName
,
Image
:
imageName
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
strings
.
Join
(
args
,
" "
)},
LivenessProbe
:
livenessProbe
,
LivenessProbe
:
livenessProbe
,
ReadinessProbe
:
readinessProbe
,
ReadinessProbe
:
readinessProbe
,
Resources
:
resources
,
Resources
:
resources
,
...
@@ -1566,23 +1464,8 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1566,23 +1464,8 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
!=
nil
{
if
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
!=
nil
{
extraPodSpecMainContainer
:=
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
.
MainContainer
extraPodSpecMainContainer
:=
opt
.
dynamoComponentDeployment
.
Spec
.
ExtraPodSpec
.
MainContainer
if
extraPodSpecMainContainer
!=
nil
{
if
extraPodSpecMainContainer
!=
nil
{
if
len
(
extraPodSpecMainContainer
.
Command
)
>
0
{
// Merge non empty fields from extraPodSpecMainContainer into container, only overriding empty fields
logs
.
Info
(
"Overriding container '"
+
container
.
Name
+
"' Command with: "
+
strings
.
Join
(
extraPodSpecMainContainer
.
Command
,
" "
))
err
:=
mergo
.
Merge
(
&
container
,
extraPodSpecMainContainer
.
DeepCopy
())
container
.
Command
=
extraPodSpecMainContainer
.
Command
}
if
len
(
extraPodSpecMainContainer
.
Args
)
>
0
{
// Special case: if command is "sh -c", we must collapse args into a single string
if
len
(
container
.
Command
)
==
2
&&
container
.
Command
[
0
]
==
"sh"
&&
container
.
Command
[
1
]
==
"-c"
{
joinedArgs
:=
strings
.
Join
(
extraPodSpecMainContainer
.
Args
,
" "
)
logs
.
Info
(
"Special case detected for container '"
+
container
.
Name
+
"': Command is 'sh -c'; collapsing Args to: "
+
joinedArgs
)
container
.
Args
=
[]
string
{
joinedArgs
}
}
else
{
logs
.
Info
(
"Overriding container '"
+
container
.
Name
+
"' Args with: "
+
strings
.
Join
(
extraPodSpecMainContainer
.
Args
,
" "
))
container
.
Args
=
extraPodSpecMainContainer
.
Args
}
}
// finally, Merge non empty fields from extraPodSpecMainContainer into container, only overriding empty fields
err
:=
mergo
.
Merge
(
&
container
,
extraPodSpecMainContainer
)
if
err
!=
nil
{
if
err
!=
nil
{
err
=
errors
.
Wrapf
(
err
,
"failed to merge extraPodSpecMainContainer into container"
)
err
=
errors
.
Wrapf
(
err
,
"failed to merge extraPodSpecMainContainer into container"
)
return
nil
,
err
return
nil
,
err
...
@@ -1723,7 +1606,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1723,7 +1606,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
}
}
func
getResourcesConfig
(
resources
*
dynamoCommon
.
Resources
)
(
corev1
.
ResourceRequirements
,
error
)
{
func
getResourcesConfig
(
resources
*
dynamoCommon
.
Resources
)
(
corev1
.
ResourceRequirements
,
error
)
{
curren
tResources
:=
corev1
.
ResourceRequirements
{
defaul
tResources
:=
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
),
...
@@ -1733,86 +1616,18 @@ func getResourcesConfig(resources *dynamoCommon.Resources) (corev1.ResourceRequi
...
@@ -1733,86 +1616,18 @@ func getResourcesConfig(resources *dynamoCommon.Resources) (corev1.ResourceRequi
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
},
},
}
}
if
resources
==
nil
{
if
resources
==
nil
{
return
curren
tResources
,
nil
return
defaul
tResources
,
nil
}
}
resourcesConfig
,
err
:=
controller_common
.
GetResourcesConfig
(
resources
)
if
resources
.
Limits
!=
nil
{
if
err
!=
nil
{
if
resources
.
Limits
.
CPU
!=
""
{
return
corev1
.
ResourceRequirements
{},
errors
.
Wrapf
(
err
,
"failed to get resources config"
)
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
CPU
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits cpu quantity"
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Limits
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
Memory
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits memory quantity"
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceMemory
]
=
q
}
if
resources
.
Limits
.
GPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
GPU
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits gpu quantity"
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
commonconsts
.
KubeResourceGPUNvidia
]
=
q
}
for
k
,
v
:=
range
resources
.
Limits
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse limits %s quantity"
,
k
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
}
if
resources
.
Requests
!=
nil
{
err
=
mergo
.
Merge
(
resourcesConfig
,
defaultResources
.
DeepCopy
())
if
resources
.
Requests
.
CPU
!=
""
{
if
err
!=
nil
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
CPU
)
return
corev1
.
ResourceRequirements
{},
errors
.
Wrapf
(
err
,
"failed to merge resources config"
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests cpu quantity"
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Requests
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
Memory
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests memory quantity"
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceMemory
]
=
q
}
for
k
,
v
:=
range
resources
.
Requests
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
currentResources
,
errors
.
Wrapf
(
err
,
"parse requests %s quantity"
,
k
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
}
return
currentR
esources
,
nil
return
*
r
esources
Config
,
nil
}
}
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateService
(
opt
generateResourceOption
)
(
*
corev1
.
Service
,
bool
,
error
)
{
func
(
r
*
DynamoComponentDeploymentReconciler
)
generateService
(
opt
generateResourceOption
)
(
*
corev1
.
Service
,
bool
,
error
)
{
...
@@ -1930,7 +1745,7 @@ func (r *DynamoComponentDeploymentReconciler) SetupWithManager(mgr ctrl.Manager)
...
@@ -1930,7 +1745,7 @@ func (r *DynamoComponentDeploymentReconciler) SetupWithManager(mgr ctrl.Manager)
}))
}))
}
}
if
r
.
UseVirtualService
{
if
r
.
Config
.
IngressConfig
.
UseVirtualService
()
{
m
.
Owns
(
&
networkingv1beta1
.
VirtualService
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{}))
m
.
Owns
(
&
networkingv1beta1
.
VirtualService
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{}))
}
}
m
.
Owns
(
&
autoscalingv2
.
HorizontalPodAutoscaler
{})
m
.
Owns
(
&
autoscalingv2
.
HorizontalPodAutoscaler
{})
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
ee3a8e42
...
@@ -278,7 +278,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
...
@@ -278,7 +278,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"someservice"
,
Host
:
"someservice"
,
IngressControllerClassName
:
&
[]
string
{
"nginx"
}[
0
],
IngressControllerClassName
:
&
[]
string
{
"nginx"
}[
0
],
...
@@ -337,7 +337,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
...
@@ -337,7 +337,7 @@ func TestDynamoComponentDeploymentReconciler_generateIngress(t *testing.T) {
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
false
,
Enabled
:
false
,
},
},
},
},
...
@@ -400,7 +400,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
...
@@ -400,7 +400,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
},
},
},
},
...
@@ -432,7 +432,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
...
@@ -432,7 +432,7 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
ServiceName
:
"service1"
,
ServiceName
:
"service1"
,
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
DynamoNamespace
:
&
[]
string
{
"default"
}[
0
],
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"someservice"
,
Host
:
"someservice"
,
UseVirtualService
:
true
,
UseVirtualService
:
true
,
...
@@ -495,13 +495,10 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
...
@@ -495,13 +495,10 @@ func TestDynamoComponentDeploymentReconciler_generateVirtualService(t *testing.T
func
TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup
(
t
*
testing
.
T
)
{
func
TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup
(
t
*
testing
.
T
)
{
type
fields
struct
{
type
fields
struct
{
Client
client
.
Client
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdStorage
etcdStorage
EtcdAddr
string
EtcdStorage
etcdStorage
UseVirtualService
bool
}
}
type
args
struct
{
type
args
struct
{
ctx
context
.
Context
ctx
context
.
Context
...
@@ -755,13 +752,10 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
...
@@ -755,13 +752,10 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
g
:=
gomega
.
NewGomegaWithT
(
t
)
g
:=
gomega
.
NewGomegaWithT
(
t
)
r
:=
&
DynamoComponentDeploymentReconciler
{
r
:=
&
DynamoComponentDeploymentReconciler
{
Client
:
tt
.
fields
.
Client
,
Client
:
tt
.
fields
.
Client
,
Recorder
:
tt
.
fields
.
Recorder
,
Recorder
:
tt
.
fields
.
Recorder
,
Config
:
tt
.
fields
.
Config
,
Config
:
tt
.
fields
.
Config
,
NatsAddr
:
tt
.
fields
.
NatsAddr
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
EtcdAddr
:
tt
.
fields
.
EtcdAddr
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
UseVirtualService
:
tt
.
fields
.
UseVirtualService
,
}
}
got
,
got1
,
err
:=
r
.
generateVolcanoPodGroup
(
tt
.
args
.
ctx
,
tt
.
args
.
opt
)
got
,
got1
,
err
:=
r
.
generateVolcanoPodGroup
(
tt
.
args
.
ctx
,
tt
.
args
.
opt
)
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
...
@@ -789,10 +783,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -789,10 +783,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Client
client
.
Client
Client
client
.
Client
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
Config
controller_common
.
Config
Config
controller_common
.
Config
NatsAddr
string
EtcdAddr
string
EtcdStorage
etcdStorage
EtcdStorage
etcdStorage
UseVirtualService
bool
DockerSecretRetriever
*
mockDockerSecretRetriever
DockerSecretRetriever
*
mockDockerSecretRetriever
}
}
type
args
struct
{
type
args
struct
{
...
@@ -847,6 +838,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -847,6 +838,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
ExtraPodSpec
:
&
dynamoCommon
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
MainContainer
:
&
corev1
.
Container
{
Image
:
"test-image:latest"
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
,
},
Args
:
[]
string
{
"some dynamo command"
,
},
},
},
},
},
},
},
...
@@ -897,7 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -897,7 +895,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Name
:
"main"
,
Name
:
"main"
,
Image
:
"test-image:latest"
,
Image
:
"test-image:latest"
,
Command
:
[]
string
{
"sh"
,
"-c"
},
Command
:
[]
string
{
"sh"
,
"-c"
},
Args
:
[]
string
{
"ray start --head --port=6379 &&
cd src && uv run dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks --service-name test-lws-deploy-service test-tag --test-lws-deploy-service.ServiceArgs.dynamo.namespace=default
"
},
Args
:
[]
string
{
"ray start --head --port=6379 &&
some dynamo command
"
},
Env
:
[]
corev1
.
EnvVar
{{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
)}},
Env
:
[]
corev1
.
EnvVar
{{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
)}},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
{
...
@@ -1095,10 +1093,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -1095,10 +1093,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Client
:
fakeKubeClient
,
// Use the fake client
Client
:
fakeKubeClient
,
// Use the fake client
Recorder
:
tt
.
fields
.
Recorder
,
Recorder
:
tt
.
fields
.
Recorder
,
Config
:
tt
.
fields
.
Config
,
Config
:
tt
.
fields
.
Config
,
NatsAddr
:
tt
.
fields
.
NatsAddr
,
EtcdAddr
:
tt
.
fields
.
EtcdAddr
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
EtcdStorage
:
tt
.
fields
.
EtcdStorage
,
UseVirtualService
:
tt
.
fields
.
UseVirtualService
,
DockerSecretRetriever
:
tt
.
fields
.
DockerSecretRetriever
,
DockerSecretRetriever
:
tt
.
fields
.
DockerSecretRetriever
,
// Scheme: s, // Pass scheme if reconciler uses it directly, often client uses it
// Scheme: s, // Pass scheme if reconciler uses it directly, often client uses it
}
}
...
...
deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
View file @
ee3a8e42
...
@@ -21,6 +21,10 @@ import (
...
@@ -21,6 +21,10 @@ import (
"context"
"context"
"fmt"
"fmt"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
corev1
"k8s.io/api/core/v1"
networkingv1
"k8s.io/api/networking/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/record"
ctrl
"sigs.k8s.io/controller-runtime"
ctrl
"sigs.k8s.io/controller-runtime"
...
@@ -31,14 +35,19 @@ import (
...
@@ -31,14 +35,19 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/predicate"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/dynamo"
)
)
type
State
string
type
Reason
string
type
Message
string
const
(
const
(
FailedState
=
"failed"
FailedState
State
=
"failed"
ReadyState
=
"successful"
ReadyState
State
=
"successful"
PendingState
=
"pending"
PendingState
State
=
"pending"
)
)
type
etcdStorage
interface
{
type
etcdStorage
interface
{
...
@@ -48,17 +57,15 @@ type etcdStorage interface {
...
@@ -48,17 +57,15 @@ type etcdStorage interface {
// DynamoGraphDeploymentReconciler reconciles a DynamoGraphDeployment object
// DynamoGraphDeploymentReconciler reconciles a DynamoGraphDeployment object
type
DynamoGraphDeploymentReconciler
struct
{
type
DynamoGraphDeploymentReconciler
struct
{
client
.
Client
client
.
Client
Config
commonController
.
Config
Config
commonController
.
Config
Recorder
record
.
EventRecorder
Recorder
record
.
EventRecorder
VirtualServiceGateway
string
DockerSecretRetriever
dockerSecretRetriever
IngressControllerClassName
string
IngressControllerTLSSecret
string
IngressHostSuffix
string
}
}
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=grove.io,resources=podgangsets,verbs=get;list;watch;create;update;patch;delete
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// move the current state of the cluster closer to the desired state.
...
@@ -73,8 +80,9 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -73,8 +80,9 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
logger
:=
log
.
FromContext
(
ctx
)
logger
:=
log
.
FromContext
(
ctx
)
var
err
error
var
err
error
reason
:=
"undefined"
reason
:=
Reason
(
"undefined"
)
message
:=
""
message
:=
Message
(
""
)
state
:=
PendingState
readyStatus
:=
metav1
.
ConditionFalse
readyStatus
:=
metav1
.
ConditionFalse
// retrieve the CRD
// retrieve the CRD
dynamoDeployment
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{}
dynamoDeployment
:=
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{}
...
@@ -88,16 +96,20 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -88,16 +96,20 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
defer
func
()
{
defer
func
()
{
if
err
!=
nil
{
if
err
!=
nil
{
dynamoDeployment
.
SetS
tate
(
FailedState
)
s
tate
=
FailedState
message
=
err
.
Error
()
message
=
Message
(
err
.
Error
()
)
logger
.
Error
(
err
,
"Reconciliation failed"
)
logger
.
Error
(
err
,
"Reconciliation failed"
)
}
}
dynamoDeployment
.
SetState
(
string
(
state
))
if
state
==
ReadyState
{
readyStatus
=
metav1
.
ConditionTrue
}
// update the CRD status condition
// update the CRD status condition
dynamoDeployment
.
AddStatusCondition
(
metav1
.
Condition
{
dynamoDeployment
.
AddStatusCondition
(
metav1
.
Condition
{
Type
:
"Ready"
,
Type
:
"Ready"
,
Status
:
readyStatus
,
Status
:
readyStatus
,
Reason
:
reason
,
Reason
:
string
(
reason
)
,
Message
:
message
,
Message
:
string
(
message
)
,
LastTransitionTime
:
metav1
.
Now
(),
LastTransitionTime
:
metav1
.
Now
(),
})
})
err
=
r
.
Status
()
.
Update
(
ctx
,
dynamoDeployment
)
err
=
r
.
Status
()
.
Update
(
ctx
,
dynamoDeployment
)
...
@@ -116,81 +128,164 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
...
@@ -116,81 +128,164 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
if
deleted
{
if
deleted
{
return
ctrl
.
Result
{},
nil
return
ctrl
.
Result
{},
nil
}
}
state
,
reason
,
message
,
err
=
r
.
reconcileResources
(
ctx
,
dynamoDeployment
)
// generate the dynamoComponentsDeployments from the config
dynamoComponentsDeployments
,
err
:=
dynamo
.
GenerateDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
,
r
.
generateDefaultIngressSpec
(
dynamoDeployment
))
if
err
!=
nil
{
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to
generate the DynamoComponentsDeployments and DynamoComponent
s"
)
logger
.
Error
(
err
,
"failed to
reconcile the resource
s"
)
reason
=
"failed_to_
generate_the_DynamoComponentsDeployment
s"
reason
=
"failed_to_
reconcile_the_resource
s"
return
ctrl
.
Result
{},
err
return
ctrl
.
Result
{},
err
}
}
return
ctrl
.
Result
{},
nil
}
// merge the dynamoComponentsDeployments with the dynamoComponentsDeployments from the CRD
type
Resource
interface
{
for
_
,
deployment
:=
range
dynamoComponentsDeployments
{
IsReady
()
bool
if
deployment
.
Spec
.
Ingress
.
Enabled
{
GetName
()
string
dynamoDeployment
.
SetEndpointStatus
(
r
.
isEndpointSecured
(),
getIngressHost
(
deployment
.
Spec
.
Ingress
))
}
}
}
notReadyDeployments
:=
[]
string
{}
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileResources
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
// reconcile the dynamoComponentsDeployments
logger
:=
log
.
FromContext
(
ctx
)
for
serviceName
,
dynamoComponentDeployment
:=
range
dynamoComponentsDeployments
{
if
r
.
Config
.
EnableGrove
{
logger
.
Info
(
"Reconciling the DynamoComponentDeployment"
,
"serviceName"
,
serviceName
,
"dynamoComponentDeployment"
,
dynamoComponentDeployment
)
// check if explicit opt out of grove
_
,
dynamoComponentDeployment
,
err
=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
nvidiacomv1alpha1
.
DynamoComponentDeployment
,
bool
,
error
)
{
if
dynamoDeployment
.
Annotations
[
consts
.
KubeAnnotationEnableGrove
]
==
consts
.
KubeLabelValueFalse
{
return
dynamoComponentDeployment
,
false
,
nil
logger
.
Info
(
"Grove is explicitly disabled for this deployment, skipping grove resources reconciliation"
)
})
return
r
.
reconcileDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the DynamoComponentDeployment"
)
reason
=
"failed_to_sync_the_DynamoComponentDeployment"
return
ctrl
.
Result
{},
err
}
if
!
dynamoComponentDeployment
.
Status
.
IsReady
()
{
notReadyDeployments
=
append
(
notReadyDeployments
,
dynamoComponentDeployment
.
Name
)
}
}
return
r
.
reconcileGroveResources
(
ctx
,
dynamoDeployment
)
}
}
if
len
(
notReadyDeployments
)
==
0
{
return
r
.
reconcileDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
)
dynamoDeployment
.
SetState
(
ReadyState
)
reason
=
"all_deployments_are_ready"
message
=
"All deployments are ready"
readyStatus
=
metav1
.
ConditionTrue
}
else
{
reason
=
"some_deployments_are_not_ready"
message
=
fmt
.
Sprintf
(
"The following deployments are not ready: %v"
,
notReadyDeployments
)
dynamoDeployment
.
SetState
(
PendingState
)
}
return
ctrl
.
Result
{},
nil
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
generateDefaultIngressSpec
(
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
*
nvidiacomv1alpha1
.
IngressSpec
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileGroveResources
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
res
:=
&
nvidiacomv1alpha1
.
IngressSpec
{
logger
:=
log
.
FromContext
(
ctx
)
Enabled
:
r
.
VirtualServiceGateway
!=
""
||
r
.
IngressControllerClassName
!=
""
,
// generate the dynamoComponentsDeployments from the config
Host
:
dynamoDeployment
.
Name
,
groveGangSet
,
err
:=
dynamo
.
GenerateGrovePodGangSet
(
ctx
,
dynamoDeployment
,
r
.
Config
,
r
.
DockerSecretRetriever
)
UseVirtualService
:
r
.
VirtualServiceGateway
!=
""
,
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the Grove GangSet"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the Grove GangSet: %w"
,
err
)
}
}
if
r
.
IngressControllerClassName
!=
""
{
_
,
syncedGroveGangSet
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
grovev1alpha1
.
PodGangSet
,
bool
,
error
)
{
res
.
IngressControllerClassName
=
&
r
.
IngressControllerClassName
return
groveGangSet
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the Grove GangSet"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the Grove GangSet: %w"
,
err
)
}
}
if
r
.
IngressControllerTLSSecret
!=
""
{
groveGangSetAsResource
:=
commonController
.
WrapResource
(
syncedGroveGangSet
,
func
()
bool
{
res
.
TLS
=
&
nvidiacomv1alpha1
.
IngressTLSSpec
{
if
syncedGroveGangSet
.
Status
.
LastOperation
!=
nil
&&
syncedGroveGangSet
.
Status
.
LastOperation
.
State
==
grovev1alpha1
.
LastOperationStateSucceeded
{
SecretName
:
r
.
IngressControllerTLSSecret
,
return
true
}
return
false
})
resources
:=
[]
Resource
{
groveGangSetAsResource
}
for
componentName
,
component
:=
range
dynamoDeployment
.
Spec
.
Services
{
if
component
.
ComponentType
==
consts
.
ComponentTypeMain
{
// generate the main component service
mainComponentService
,
err
:=
dynamo
.
GenerateComponentService
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component service: %w"
,
err
)
}
_
,
syncedMainComponentService
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
corev1
.
Service
,
bool
,
error
)
{
return
mainComponentService
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the main component service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component service: %w"
,
err
)
}
mainComponentServiceAsResource
:=
commonController
.
WrapResource
(
syncedMainComponentService
,
func
()
bool
{
return
true
})
resources
=
append
(
resources
,
mainComponentServiceAsResource
)
// generate the main component ingress
ingressSpec
:=
dynamo
.
GenerateDefaultIngressSpec
(
dynamoDeployment
,
r
.
Config
.
IngressConfig
)
if
component
.
Ingress
!=
nil
{
ingressSpec
=
*
component
.
Ingress
}
mainComponentIngress
:=
dynamo
.
GenerateComponentIngress
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
,
ingressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component ingress"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component ingress: %w"
,
err
)
}
_
,
syncedMainComponentIngress
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1
.
Ingress
,
bool
,
error
)
{
if
!
ingressSpec
.
Enabled
||
ingressSpec
.
IngressControllerClassName
==
nil
{
logger
.
Info
(
"Ingress is not enabled"
)
return
mainComponentIngress
,
true
,
nil
}
return
mainComponentIngress
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the main component ingress"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component ingress: %w"
,
err
)
}
resources
=
append
(
resources
,
commonController
.
WrapResource
(
syncedMainComponentIngress
,
func
()
bool
{
return
true
}))
// generate the main component virtual service
mainComponentVirtualService
:=
dynamo
.
GenerateComponentVirtualService
(
ctx
,
dynamo
.
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
dynamoDeployment
.
Namespace
,
ingressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the main component virtual service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the main component virtual service: %w"
,
err
)
}
_
,
syncedMainComponentVirtualService
,
err
:=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
networkingv1beta1
.
VirtualService
,
bool
,
error
)
{
vsEnabled
:=
ingressSpec
.
Enabled
&&
ingressSpec
.
UseVirtualService
&&
ingressSpec
.
VirtualServiceGateway
!=
nil
if
!
vsEnabled
{
logger
.
Info
(
"VirtualService is not enabled"
)
return
mainComponentVirtualService
,
true
,
nil
}
return
mainComponentVirtualService
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the main component virtual service"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the main component virtual service: %w"
,
err
)
}
resources
=
append
(
resources
,
commonController
.
WrapResource
(
syncedMainComponentVirtualService
,
func
()
bool
{
return
true
}))
}
}
}
}
if
r
.
IngressHostSuffix
!=
""
{
return
r
.
checkResourcesReadiness
(
resources
)
res
.
HostSuffix
=
&
r
.
IngressHostSuffix
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
checkResourcesReadiness
(
resources
[]
Resource
)
(
State
,
Reason
,
Message
,
error
)
{
notReadyResources
:=
[]
string
{}
for
_
,
resource
:=
range
resources
{
if
!
resource
.
IsReady
()
{
notReadyResources
=
append
(
notReadyResources
,
resource
.
GetName
())
}
}
}
if
r
.
VirtualServiceGateway
!=
""
{
if
len
(
notReadyResources
)
==
0
{
re
s
.
VirtualServiceGateway
=
&
r
.
VirtualServiceGateway
re
turn
ReadyState
,
"all_resources_are_ready"
,
Message
(
"All resources are ready"
),
nil
}
}
return
res
return
PendingState
,
"some_resources_are_not_ready"
,
Message
(
fmt
.
Sprintf
(
"%d resources not ready: %v"
,
len
(
notReadyResources
),
notReadyResources
)),
nil
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
isEndpointSecured
()
bool
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
reconcileDynamoComponentsDeployments
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
(
State
,
Reason
,
Message
,
error
)
{
if
r
.
VirtualServiceGateway
!=
""
&&
r
.
Config
.
VirtualServiceSupportsHTTPS
{
resources
:=
[]
Resource
{}
return
true
logger
:=
log
.
FromContext
(
ctx
)
// generate the dynamoComponentsDeployments from the config
defaultIngressSpec
:=
dynamo
.
GenerateDefaultIngressSpec
(
dynamoDeployment
,
r
.
Config
.
IngressConfig
)
dynamoComponentsDeployments
,
err
:=
dynamo
.
GenerateDynamoComponentsDeployments
(
ctx
,
dynamoDeployment
,
&
defaultIngressSpec
)
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to generate the DynamoComponentsDeployments"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to generate the DynamoComponentsDeployments: %w"
,
err
)
}
}
return
r
.
IngressControllerTLSSecret
!=
""
// reconcile the dynamoComponentsDeployments
for
serviceName
,
dynamoComponentDeployment
:=
range
dynamoComponentsDeployments
{
logger
.
Info
(
"Reconciling the DynamoComponentDeployment"
,
"serviceName"
,
serviceName
,
"dynamoComponentDeployment"
,
dynamoComponentDeployment
)
_
,
dynamoComponentDeployment
,
err
=
commonController
.
SyncResource
(
ctx
,
r
,
dynamoDeployment
,
func
(
ctx
context
.
Context
)
(
*
nvidiacomv1alpha1
.
DynamoComponentDeployment
,
bool
,
error
)
{
return
dynamoComponentDeployment
,
false
,
nil
})
if
err
!=
nil
{
logger
.
Error
(
err
,
"failed to sync the DynamoComponentDeployment"
)
return
""
,
""
,
""
,
fmt
.
Errorf
(
"failed to sync the DynamoComponentDeployment: %w"
,
err
)
}
resources
=
append
(
resources
,
dynamoComponentDeployment
)
}
return
r
.
checkResourcesReadiness
(
resources
)
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
FinalizeResource
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
FinalizeResource
(
ctx
context
.
Context
,
dynamoDeployment
*
nvidiacomv1alpha1
.
DynamoGraphDeployment
)
error
{
...
@@ -200,7 +295,7 @@ func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context,
...
@@ -200,7 +295,7 @@ func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context,
// SetupWithManager sets up the controller with the Manager.
// SetupWithManager sets up the controller with the Manager.
func
(
r
*
DynamoGraphDeploymentReconciler
)
SetupWithManager
(
mgr
ctrl
.
Manager
)
error
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
SetupWithManager
(
mgr
ctrl
.
Manager
)
error
{
return
ctrl
.
NewControllerManagedBy
(
mgr
)
.
ctrlBuilder
:=
ctrl
.
NewControllerManagedBy
(
mgr
)
.
For
(
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{},
builder
.
WithPredicates
(
For
(
&
nvidiacomv1alpha1
.
DynamoGraphDeployment
{},
builder
.
WithPredicates
(
predicate
.
GenerationChangedPredicate
{},
predicate
.
GenerationChangedPredicate
{},
))
.
))
.
...
@@ -212,8 +307,17 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
...
@@ -212,8 +307,17 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
}))
.
}))
.
WithEventFilter
(
commonController
.
EphemeralDeploymentEventFilter
(
r
.
Config
))
.
WithEventFilter
(
commonController
.
EphemeralDeploymentEventFilter
(
r
.
Config
))
Complete
(
r
)
if
r
.
Config
.
EnableGrove
{
ctrlBuilder
=
ctrlBuilder
.
Owns
(
&
grovev1alpha1
.
PodGangSet
{},
builder
.
WithPredicates
(
predicate
.
Funcs
{
// ignore creation cause we don't want to be called again after we create the pod gang set
CreateFunc
:
func
(
ce
event
.
CreateEvent
)
bool
{
return
false
},
DeleteFunc
:
func
(
de
event
.
DeleteEvent
)
bool
{
return
true
},
UpdateFunc
:
func
(
de
event
.
UpdateEvent
)
bool
{
return
true
},
GenericFunc
:
func
(
ge
event
.
GenericEvent
)
bool
{
return
true
},
}))
}
return
ctrlBuilder
.
Complete
(
r
)
}
}
func
(
r
*
DynamoGraphDeploymentReconciler
)
GetRecorder
()
record
.
EventRecorder
{
func
(
r
*
DynamoGraphDeploymentReconciler
)
GetRecorder
()
record
.
EventRecorder
{
...
...
deploy/cloud/operator/internal/controller_common/predicate.go
View file @
ee3a8e42
...
@@ -30,9 +30,22 @@ import (
...
@@ -30,9 +30,22 @@ import (
type
Config
struct
{
type
Config
struct
{
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
RestrictedNamespace
string
RestrictedNamespace
string
// If true, assume VirtualService endpoints are HTTPS
EnableLWS
bool
VirtualServiceSupportsHTTPS
bool
EnableGrove
bool
EnableLWS
bool
EtcdAddress
string
NatsAddress
string
IngressConfig
IngressConfig
}
type
IngressConfig
struct
{
VirtualServiceGateway
string
IngressControllerClassName
string
IngressControllerTLSSecret
string
IngressHostSuffix
string
}
func
(
i
*
IngressConfig
)
UseVirtualService
()
bool
{
return
i
.
VirtualServiceGateway
!=
""
}
}
func
EphemeralDeploymentEventFilter
(
config
Config
)
predicate
.
Predicate
{
func
EphemeralDeploymentEventFilter
(
config
Config
)
predicate
.
Predicate
{
...
...
deploy/cloud/operator/internal/controller_common/resource.go
View file @
ee3a8e42
...
@@ -25,8 +25,11 @@ import (
...
@@ -25,8 +25,11 @@ import (
"reflect"
"reflect"
"sort"
"sort"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/types"
...
@@ -352,3 +355,104 @@ func firstKey(m map[string]interface{}) string {
...
@@ -352,3 +355,104 @@ func firstKey(m map[string]interface{}) string {
sort
.
Strings
(
keys
)
sort
.
Strings
(
keys
)
return
keys
[
0
]
return
keys
[
0
]
}
}
func
GetResourcesConfig
(
resources
*
common
.
Resources
)
(
*
corev1
.
ResourceRequirements
,
error
)
{
if
resources
==
nil
{
return
nil
,
nil
}
currentResources
:=
&
corev1
.
ResourceRequirements
{}
if
resources
.
Limits
!=
nil
{
if
resources
.
Limits
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
CPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits cpu quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Limits
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
Memory
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits memory quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceMemory
]
=
q
}
if
resources
.
Limits
.
GPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Limits
.
GPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits gpu quantity: %w"
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
consts
.
KubeResourceGPUNvidia
)]
=
q
}
for
k
,
v
:=
range
resources
.
Limits
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse limits %s quantity: %w"
,
k
,
err
)
}
if
currentResources
.
Limits
==
nil
{
currentResources
.
Limits
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Limits
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
if
resources
.
Requests
!=
nil
{
if
resources
.
Requests
.
CPU
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
CPU
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests cpu quantity: %w"
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceCPU
]
=
q
}
if
resources
.
Requests
.
Memory
!=
""
{
q
,
err
:=
resource
.
ParseQuantity
(
resources
.
Requests
.
Memory
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests memory quantity: %w"
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceMemory
]
=
q
}
for
k
,
v
:=
range
resources
.
Requests
.
Custom
{
q
,
err
:=
resource
.
ParseQuantity
(
v
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"parse requests %s quantity: %w"
,
k
,
err
)
}
if
currentResources
.
Requests
==
nil
{
currentResources
.
Requests
=
make
(
corev1
.
ResourceList
)
}
currentResources
.
Requests
[
corev1
.
ResourceName
(
k
)]
=
q
}
}
return
currentResources
,
nil
}
type
Resource
struct
{
client
.
Object
isReady
func
()
bool
}
func
WrapResource
[
T
client
.
Object
](
resource
T
,
isReady
func
()
bool
)
*
Resource
{
return
&
Resource
{
Object
:
resource
,
isReady
:
isReady
,
}
}
func
(
r
*
Resource
)
IsReady
()
bool
{
return
r
.
isReady
()
}
deploy/cloud/operator/internal/dynamo/graph.go
View file @
ee3a8e42
...
@@ -21,13 +21,24 @@ import (
...
@@ -21,13 +21,24 @@ import (
"context"
"context"
"encoding/json"
"encoding/json"
"fmt"
"fmt"
"sort"
"strconv"
"strconv"
"strings"
"strings"
istioNetworking
"istio.io/api/networking/v1beta1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/imdario/mergo"
networkingv1beta1
"istio.io/client-go/pkg/apis/networking/v1beta1"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
networkingv1
"k8s.io/api/networking/v1"
)
)
// ServiceConfig represents the YAML configuration structure for a service
// ServiceConfig represents the YAML configuration structure for a service
...
@@ -129,13 +140,13 @@ func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComp
...
@@ -129,13 +140,13 @@ func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComp
}
}
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
func
GenerateDynamoComponentsDeployments
(
ctx
context
.
Context
,
parentDynamoGraphDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
i
ngressSpec
*
v1alpha1
.
IngressSpec
)
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
,
error
)
{
func
GenerateDynamoComponentsDeployments
(
ctx
context
.
Context
,
parentDynamoGraphDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
defaultI
ngressSpec
*
v1alpha1
.
IngressSpec
)
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
,
error
)
{
deployments
:=
make
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
)
deployments
:=
make
(
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
)
graphDynamoNamespace
:=
""
graphDynamoNamespace
:=
""
for
componentName
,
component
:=
range
parentDynamoGraphDeployment
.
Spec
.
Services
{
for
componentName
,
component
:=
range
parentDynamoGraphDeployment
.
Spec
.
Services
{
deployment
:=
&
v1alpha1
.
DynamoComponentDeployment
{}
deployment
:=
&
v1alpha1
.
DynamoComponentDeployment
{}
deployment
.
Spec
.
DynamoComponentDeploymentSharedSpec
=
component
.
DynamoComponentDeploymentSharedSpec
deployment
.
Spec
.
DynamoComponentDeploymentSharedSpec
=
component
.
DynamoComponentDeploymentSharedSpec
deployment
.
Name
=
g
etDynamoComponentName
(
parentDynamoGraphDeployment
,
componentName
)
deployment
.
Name
=
G
etDynamoComponentName
(
parentDynamoGraphDeployment
,
componentName
)
deployment
.
Namespace
=
parentDynamoGraphDeployment
.
Namespace
deployment
.
Namespace
=
parentDynamoGraphDeployment
.
Namespace
deployment
.
Spec
.
ServiceName
=
componentName
deployment
.
Spec
.
ServiceName
=
componentName
dynamoNamespace
:=
GetDefaultDynamoNamespace
(
ctx
,
parentDynamoGraphDeployment
)
dynamoNamespace
:=
GetDefaultDynamoNamespace
(
ctx
,
parentDynamoGraphDeployment
)
...
@@ -160,8 +171,8 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
...
@@ -160,8 +171,8 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
}
}
deployment
.
Spec
.
ExtraPodSpec
.
ServiceAccountName
=
commonconsts
.
PlannerServiceAccountName
deployment
.
Spec
.
ExtraPodSpec
.
ServiceAccountName
=
commonconsts
.
PlannerServiceAccountName
}
}
if
deployment
.
IsMainComponent
()
&&
i
ngressSpec
!=
nil
{
if
deployment
.
IsMainComponent
()
&&
defaultI
ngressSpec
!=
nil
&&
deployment
.
Spec
.
Ingress
==
nil
{
deployment
.
Spec
.
Ingress
=
*
i
ngressSpec
deployment
.
Spec
.
Ingress
=
defaultI
ngressSpec
}
}
// merge the envs from the parent deployment with the envs from the service
// merge the envs from the parent deployment with the envs from the service
if
len
(
parentDynamoGraphDeployment
.
Spec
.
Envs
)
>
0
{
if
len
(
parentDynamoGraphDeployment
.
Spec
.
Envs
)
>
0
{
...
@@ -286,9 +297,271 @@ func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
...
@@ -286,9 +297,271 @@ func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
for
_
,
env
:=
range
envMap
{
for
_
,
env
:=
range
envMap
{
merged
=
append
(
merged
,
env
)
merged
=
append
(
merged
,
env
)
}
}
sort
.
Slice
(
merged
,
func
(
i
,
j
int
)
bool
{
return
merged
[
i
]
.
Name
<
merged
[
j
]
.
Name
})
return
merged
return
merged
}
}
func
g
etDynamoComponentName
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
component
string
)
string
{
func
G
etDynamoComponentName
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
component
string
)
string
{
return
fmt
.
Sprintf
(
"%s-%s"
,
dynamoDeployment
.
Name
,
strings
.
ToLower
(
component
))
return
fmt
.
Sprintf
(
"%s-%s"
,
dynamoDeployment
.
Name
,
strings
.
ToLower
(
component
))
}
}
type
SecretsRetriever
interface
{
GetSecrets
(
namespace
,
registry
string
)
([]
string
,
error
)
}
func
GenerateGrovePodGangSet
(
ctx
context
.
Context
,
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
controllerConfig
controller_common
.
Config
,
secretsRetriever
SecretsRetriever
)
(
*
grovev1alpha1
.
PodGangSet
,
error
)
{
gangSet
:=
&
grovev1alpha1
.
PodGangSet
{}
gangSet
.
Name
=
dynamoDeployment
.
Name
gangSet
.
Namespace
=
dynamoDeployment
.
Namespace
gangSet
.
Spec
.
Replicas
=
1
for
componentName
,
component
:=
range
dynamoDeployment
.
Spec
.
Services
{
container
:=
corev1
.
Container
{
Name
:
"main"
,
LivenessProbe
:
component
.
LivenessProbe
,
ReadinessProbe
:
component
.
ReadinessProbe
,
Env
:
component
.
Envs
,
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
}
resourcesConfig
,
err
:=
controller_common
.
GetResourcesConfig
(
component
.
Resources
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to get resources config: %w"
,
err
)
}
container
.
Resources
=
*
resourcesConfig
if
component
.
ExtraPodSpec
!=
nil
&&
component
.
ExtraPodSpec
.
MainContainer
!=
nil
{
// merge the extraPodSpec from the parent deployment with the extraPodSpec from the service
err
:=
mergo
.
Merge
(
&
container
,
*
component
.
ExtraPodSpec
.
MainContainer
.
DeepCopy
(),
mergo
.
WithOverride
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to merge extraPodSpec: %w"
,
err
)
}
}
// retrieve the image pull secrets for the container
imagePullSecrets
:=
[]
corev1
.
LocalObjectReference
{}
if
secretsRetriever
!=
nil
{
secretsName
,
err
:=
secretsRetriever
.
GetSecrets
(
dynamoDeployment
.
Namespace
,
container
.
Image
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to get secrets for component %s and image %s: %w"
,
componentName
,
container
.
Image
,
err
)
}
for
_
,
secretName
:=
range
secretsName
{
imagePullSecrets
=
append
(
imagePullSecrets
,
corev1
.
LocalObjectReference
{
Name
:
secretName
,
})
}
}
// merge the envs from the parent deployment with the envs from the service
if
len
(
dynamoDeployment
.
Spec
.
Envs
)
>
0
{
container
.
Env
=
mergeEnvs
(
dynamoDeployment
.
Spec
.
Envs
,
container
.
Env
)
}
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
commonconsts
.
EnvDynamoServicePort
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
})
if
controllerConfig
.
NatsAddress
!=
""
{
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
"NATS_SERVER"
,
Value
:
controllerConfig
.
NatsAddress
,
})
}
if
controllerConfig
.
EtcdAddress
!=
""
{
container
.
Env
=
append
(
container
.
Env
,
corev1
.
EnvVar
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
controllerConfig
.
EtcdAddress
,
})
}
if
component
.
EnvFromSecret
!=
nil
{
container
.
EnvFrom
=
append
(
container
.
EnvFrom
,
corev1
.
EnvFromSource
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
*
component
.
EnvFromSecret
},
},
})
}
gangSet
.
Spec
.
Template
.
Cliques
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
,
&
grovev1alpha1
.
PodCliqueTemplateSpec
{
Name
:
strings
.
ToLower
(
componentName
),
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
GetDynamoComponentName
(
dynamoDeployment
,
componentName
),
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
strings
.
ToLower
(
componentName
),
Replicas
:
func
()
int32
{
if
component
.
Replicas
!=
nil
{
return
*
component
.
Replicas
}
return
1
}(),
PodSpec
:
corev1
.
PodSpec
{
Containers
:
[]
corev1
.
Container
{
container
},
ImagePullSecrets
:
imagePullSecrets
,
},
},
})
if
component
.
PVC
!=
nil
{
cliqueIndex
:=
len
(
gangSet
.
Spec
.
Template
.
Cliques
)
-
1
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Volumes
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Volumes
,
corev1
.
Volume
{
Name
:
*
component
.
PVC
.
Name
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
*
component
.
PVC
.
Name
,
},
},
})
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Containers
[
0
]
.
VolumeMounts
=
append
(
gangSet
.
Spec
.
Template
.
Cliques
[
cliqueIndex
]
.
Spec
.
PodSpec
.
Containers
[
0
]
.
VolumeMounts
,
corev1
.
VolumeMount
{
Name
:
*
component
.
PVC
.
Name
,
MountPath
:
*
component
.
PVC
.
MountPoint
,
})
}
}
return
gangSet
,
nil
}
func
GenerateComponentService
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
)
(
*
corev1
.
Service
,
error
)
{
service
:=
&
corev1
.
Service
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
componentName
,
Namespace
:
componentNamespace
,
},
Spec
:
corev1
.
ServiceSpec
{
Selector
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
componentName
,
},
Ports
:
[]
corev1
.
ServicePort
{
{
Name
:
commonconsts
.
DynamoServicePortName
,
Port
:
commonconsts
.
DynamoServicePort
,
TargetPort
:
intstr
.
FromString
(
commonconsts
.
DynamoContainerPortName
),
Protocol
:
corev1
.
ProtocolTCP
,
},
},
},
}
return
service
,
nil
}
func
GenerateComponentIngress
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
,
ingressSpec
v1alpha1
.
IngressSpec
)
*
networkingv1
.
Ingress
{
resourceName
:=
componentName
ingress
:=
&
networkingv1
.
Ingress
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
resourceName
,
Namespace
:
componentNamespace
,
},
}
host
:=
getIngressHost
(
ingressSpec
)
ingress
.
Spec
=
networkingv1
.
IngressSpec
{
IngressClassName
:
ingressSpec
.
IngressControllerClassName
,
Rules
:
[]
networkingv1
.
IngressRule
{
{
Host
:
host
,
IngressRuleValue
:
networkingv1
.
IngressRuleValue
{
HTTP
:
&
networkingv1
.
HTTPIngressRuleValue
{
Paths
:
[]
networkingv1
.
HTTPIngressPath
{
{
Path
:
"/"
,
PathType
:
&
[]
networkingv1
.
PathType
{
networkingv1
.
PathTypePrefix
}[
0
],
Backend
:
networkingv1
.
IngressBackend
{
Service
:
&
networkingv1
.
IngressServiceBackend
{
Name
:
resourceName
,
Port
:
networkingv1
.
ServiceBackendPort
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
},
},
},
}
if
ingressSpec
.
TLS
!=
nil
{
ingress
.
Spec
.
TLS
=
[]
networkingv1
.
IngressTLS
{
{
Hosts
:
[]
string
{
host
},
SecretName
:
ingressSpec
.
TLS
.
SecretName
,
},
}
}
return
ingress
}
func
getIngressHost
(
ingressSpec
v1alpha1
.
IngressSpec
)
string
{
host
:=
ingressSpec
.
Host
if
ingressSpec
.
HostPrefix
!=
nil
{
host
=
*
ingressSpec
.
HostPrefix
+
host
}
ingressSuffix
:=
commonconsts
.
DefaultIngressSuffix
if
ingressSpec
.
HostSuffix
!=
nil
{
ingressSuffix
=
*
ingressSpec
.
HostSuffix
}
return
fmt
.
Sprintf
(
"%s.%s"
,
host
,
ingressSuffix
)
}
func
GenerateComponentVirtualService
(
ctx
context
.
Context
,
componentName
,
componentNamespace
string
,
ingressSpec
v1alpha1
.
IngressSpec
)
*
networkingv1beta1
.
VirtualService
{
vs
:=
&
networkingv1beta1
.
VirtualService
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
componentName
,
Namespace
:
componentNamespace
,
},
}
vs
.
Spec
=
istioNetworking
.
VirtualService
{
Hosts
:
[]
string
{
getIngressHost
(
ingressSpec
),
},
Gateways
:
[]
string
{
*
ingressSpec
.
VirtualServiceGateway
},
Http
:
[]
*
istioNetworking
.
HTTPRoute
{
{
Match
:
[]
*
istioNetworking
.
HTTPMatchRequest
{
{
Uri
:
&
istioNetworking
.
StringMatch
{
MatchType
:
&
istioNetworking
.
StringMatch_Prefix
{
Prefix
:
"/"
},
},
},
},
Route
:
[]
*
istioNetworking
.
HTTPRouteDestination
{
{
Destination
:
&
istioNetworking
.
Destination
{
Host
:
componentName
,
Port
:
&
istioNetworking
.
PortSelector
{
Number
:
commonconsts
.
DynamoServicePort
,
},
},
},
},
},
},
}
return
vs
}
func
GenerateDefaultIngressSpec
(
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
,
ingressConfig
controller_common
.
IngressConfig
)
v1alpha1
.
IngressSpec
{
res
:=
v1alpha1
.
IngressSpec
{
Enabled
:
ingressConfig
.
VirtualServiceGateway
!=
""
||
ingressConfig
.
IngressControllerClassName
!=
""
,
Host
:
dynamoDeployment
.
Name
,
UseVirtualService
:
ingressConfig
.
VirtualServiceGateway
!=
""
,
}
if
ingressConfig
.
IngressControllerClassName
!=
""
{
res
.
IngressControllerClassName
=
&
ingressConfig
.
IngressControllerClassName
}
if
ingressConfig
.
IngressControllerTLSSecret
!=
""
{
res
.
TLS
=
&
v1alpha1
.
IngressTLSSpec
{
SecretName
:
ingressConfig
.
IngressControllerTLSSecret
,
}
}
if
ingressConfig
.
IngressHostSuffix
!=
""
{
res
.
HostSuffix
=
&
ingressConfig
.
IngressHostSuffix
}
if
ingressConfig
.
VirtualServiceGateway
!=
""
{
res
.
VirtualServiceGateway
=
&
ingressConfig
.
VirtualServiceGateway
}
return
res
}
deploy/cloud/operator/internal/dynamo/graph_test.go
View file @
ee3a8e42
...
@@ -24,15 +24,18 @@ import (
...
@@ -24,15 +24,18 @@ import (
"sort"
"sort"
"testing"
"testing"
grovev1alpha1
"github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
compounaiCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
compounaiCommon
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp"
corev1
"k8s.io/api/core/v1"
corev1
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
nvidiacomv1alpha1
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
)
)
func
TestGenerateDynamoComponentsDeployments
(
t
*
testing
.
T
)
{
func
TestGenerateDynamoComponentsDeployments
(
t
*
testing
.
T
)
{
...
@@ -88,7 +91,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -88,7 +91,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -197,7 +199,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -197,7 +199,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -306,7 +307,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -306,7 +307,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
nil
,
want
:
nil
,
wantErr
:
true
,
wantErr
:
true
,
...
@@ -387,7 +387,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -387,7 +387,7 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
commonconsts
.
KubeLabelDynamoNamespace
:
"dynamo-test-dynamographdeployment"
,
commonconsts
.
KubeLabelDynamoNamespace
:
"dynamo-test-dynamographdeployment"
,
},
},
Autoscaling
:
nil
,
Autoscaling
:
nil
,
Ingress
:
v1alpha1
.
IngressSpec
{
Ingress
:
&
v1alpha1
.
IngressSpec
{
Enabled
:
true
,
Enabled
:
true
,
Host
:
"test-dynamographdeployment"
,
Host
:
"test-dynamographdeployment"
,
},
},
...
@@ -607,7 +607,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
...
@@ -607,7 +607,6 @@ func TestGenerateDynamoComponentsDeployments(t *testing.T) {
},
},
},
},
},
},
ingressSpec
:
&
v1alpha1
.
IngressSpec
{},
},
},
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
want
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeployment
{
"service1"
:
{
"service1"
:
{
...
@@ -1117,3 +1116,398 @@ func Test_mergeEnvs(t *testing.T) {
...
@@ -1117,3 +1116,398 @@ func Test_mergeEnvs(t *testing.T) {
})
})
}
}
}
}
func
TestGenerateGrovePodGangSet
(
t
*
testing
.
T
)
{
type
args
struct
{
ctx
context
.
Context
dynamoDeployment
*
v1alpha1
.
DynamoGraphDeployment
controllerConfig
controller_common
.
Config
}
tests
:=
[]
struct
{
name
string
args
args
want
*
grovev1alpha1
.
PodGangSet
wantErr
bool
}{
{
name
:
"test_generate_grove_pod_gang_set"
,
args
:
args
{
ctx
:
context
.
Background
(),
controllerConfig
:
controller_common
.
Config
{
EtcdAddress
:
"etcd-address"
,
NatsAddress
:
"nats-address"
,
},
dynamoDeployment
:
&
v1alpha1
.
DynamoGraphDeployment
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
"test-dynamo-graph-deployment"
,
Namespace
:
"test-namespace"
,
},
Spec
:
v1alpha1
.
DynamoGraphDeploymentSpec
{
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
},
Services
:
map
[
string
]
*
v1alpha1
.
DynamoComponentDeploymentOverridesSpec
{
"Frontend"
:
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Replicas
:
&
[]
int32
{
1
}[
0
],
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"1"
,
Memory
:
"1Gi"
,
GPU
:
"1"
,
},
},
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"FRONTEND_ENV_1"
,
Value
:
"1"
,
},
},
EnvFromSecret
:
&
[]
string
{
"frontend-secret"
}[
0
],
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $FRONTEND_ENV_1"
,
},
Args
:
[]
string
{
"--frontend-env-1"
,
"1"
,
},
Image
:
"frontend-image"
,
},
},
},
},
"Planner"
:
{
DynamoComponentDeploymentSharedSpec
:
v1alpha1
.
DynamoComponentDeploymentSharedSpec
{
Replicas
:
&
[]
int32
{
2
}[
0
],
Resources
:
&
common
.
Resources
{
Requests
:
&
common
.
ResourceItem
{
CPU
:
"2"
,
Memory
:
"2Gi"
,
},
Limits
:
&
common
.
ResourceItem
{
CPU
:
"2"
,
Memory
:
"2Gi"
,
GPU
:
"2"
,
},
},
Envs
:
[]
corev1
.
EnvVar
{
{
Name
:
"PLANNER_ENV_1"
,
Value
:
"2"
,
},
},
PVC
:
&
v1alpha1
.
PVC
{
Name
:
&
[]
string
{
"planner-pvc"
}[
0
],
MountPoint
:
&
[]
string
{
"/planner"
}[
0
],
},
EnvFromSecret
:
&
[]
string
{
"planner-secret"
}[
0
],
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ExtraPodSpec
:
&
common
.
ExtraPodSpec
{
MainContainer
:
&
corev1
.
Container
{
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $PLANNER_ENV_1"
,
},
Args
:
[]
string
{
"--planner-env-1"
,
"1"
,
},
Image
:
"planner-image"
,
},
},
},
},
},
},
},
},
want
:
&
grovev1alpha1
.
PodGangSet
{
ObjectMeta
:
metav1
.
ObjectMeta
{
Name
:
"test-dynamo-graph-deployment"
,
Namespace
:
"test-namespace"
,
},
Spec
:
grovev1alpha1
.
PodGangSetSpec
{
Replicas
:
1
,
Template
:
grovev1alpha1
.
PodGangSetTemplateSpec
{
Cliques
:
[]
*
grovev1alpha1
.
PodCliqueTemplateSpec
{
{
Name
:
"frontend"
,
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
"test-dynamo-graph-deployment-frontend"
,
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
"frontend"
,
Replicas
:
1
,
PodSpec
:
corev1
.
PodSpec
{
ImagePullSecrets
:
[]
corev1
.
LocalObjectReference
{},
Containers
:
[]
corev1
.
Container
{
{
Name
:
"main"
,
Image
:
"frontend-image"
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $FRONTEND_ENV_1"
,
},
Args
:
[]
string
{
"--frontend-env-1"
,
"1"
,
},
EnvFrom
:
[]
corev1
.
EnvFromSource
{
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
"frontend-secret"
,
},
},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
{
Name
:
"FRONTEND_ENV_1"
,
Value
:
"1"
,
},
{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
},
{
Name
:
"NATS_SERVER"
,
Value
:
"nats-address"
,
},
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
"etcd-address"
,
},
},
Resources
:
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"1"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"1"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"1"
),
},
},
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
},
},
},
},
},
{
Name
:
"planner"
,
Labels
:
map
[
string
]
string
{
commonconsts
.
KubeLabelDynamoSelector
:
"test-dynamo-graph-deployment-planner"
,
},
Spec
:
grovev1alpha1
.
PodCliqueSpec
{
RoleName
:
"planner"
,
Replicas
:
2
,
PodSpec
:
corev1
.
PodSpec
{
ImagePullSecrets
:
[]
corev1
.
LocalObjectReference
{},
Volumes
:
[]
corev1
.
Volume
{
{
Name
:
"planner-pvc"
,
VolumeSource
:
corev1
.
VolumeSource
{
PersistentVolumeClaim
:
&
corev1
.
PersistentVolumeClaimVolumeSource
{
ClaimName
:
"planner-pvc"
,
},
},
},
},
Containers
:
[]
corev1
.
Container
{
{
Name
:
"main"
,
Image
:
"planner-image"
,
Command
:
[]
string
{
"/bin/sh"
,
"-c"
,
"echo $PLANNER_ENV_1"
,
},
Args
:
[]
string
{
"--planner-env-1"
,
"1"
,
},
EnvFrom
:
[]
corev1
.
EnvFromSource
{
{
SecretRef
:
&
corev1
.
SecretEnvSource
{
LocalObjectReference
:
corev1
.
LocalObjectReference
{
Name
:
"planner-secret"
,
},
},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/health"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/ready"
,
Port
:
intstr
.
FromInt
(
8080
),
},
},
},
Env
:
[]
corev1
.
EnvVar
{
{
Name
:
"DYNAMO_POD_GANG_SET_REPLICAS"
,
Value
:
"1"
,
},
{
Name
:
"PLANNER_ENV_1"
,
Value
:
"2"
,
},
{
Name
:
"DYNAMO_PORT"
,
Value
:
fmt
.
Sprintf
(
"%d"
,
commonconsts
.
DynamoServicePort
),
},
{
Name
:
"NATS_SERVER"
,
Value
:
"nats-address"
,
},
{
Name
:
"ETCD_ENDPOINTS"
,
Value
:
"etcd-address"
,
},
},
Resources
:
corev1
.
ResourceRequirements
{
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"2"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"2Gi"
),
},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"2"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"2Gi"
),
corev1
.
ResourceName
(
"nvidia.com/gpu"
)
:
resource
.
MustParse
(
"2"
),
},
},
VolumeMounts
:
[]
corev1
.
VolumeMount
{
{
Name
:
"planner-pvc"
,
MountPath
:
"/planner"
,
},
},
Ports
:
[]
corev1
.
ContainerPort
{
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoContainerPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoServicePort
),
},
{
Protocol
:
corev1
.
ProtocolTCP
,
Name
:
commonconsts
.
DynamoHealthPortName
,
ContainerPort
:
int32
(
commonconsts
.
DynamoHealthPort
),
},
},
},
},
},
},
},
},
},
},
},
wantErr
:
false
,
},
}
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
got
,
err
:=
GenerateGrovePodGangSet
(
tt
.
args
.
ctx
,
tt
.
args
.
dynamoDeployment
,
tt
.
args
.
controllerConfig
,
nil
)
if
(
err
!=
nil
)
!=
tt
.
wantErr
{
t
.
Errorf
(
"GenerateGrovePodGangSet() error = %v, wantErr %v"
,
err
,
tt
.
wantErr
)
return
}
sort
.
Slice
(
got
.
Spec
.
Template
.
Cliques
,
func
(
i
,
j
int
)
bool
{
return
got
.
Spec
.
Template
.
Cliques
[
i
]
.
Name
<
got
.
Spec
.
Template
.
Cliques
[
j
]
.
Name
})
sort
.
Slice
(
tt
.
want
.
Spec
.
Template
.
Cliques
,
func
(
i
,
j
int
)
bool
{
return
tt
.
want
.
Spec
.
Template
.
Cliques
[
i
]
.
Name
<
tt
.
want
.
Spec
.
Template
.
Cliques
[
j
]
.
Name
})
if
diff
:=
cmp
.
Diff
(
got
,
tt
.
want
);
diff
!=
""
{
t
.
Errorf
(
"GenerateGrovePodGangSet() mismatch (-want +got):
\n
%s"
,
diff
)
}
})
}
}
deploy/helm/chart/templates/deployment.yaml
View file @
ee3a8e42
...
@@ -38,8 +38,14 @@ spec:
...
@@ -38,8 +38,14 @@ spec:
-
name
:
{{
$.Release.Name
}}
-{{ $serviceName | lower }}
-
name
:
{{
$.Release.Name
}}
-{{ $serviceName | lower }}
image
:
{{
$serviceSpec.extraPodSpec.mainContainer.image
}}
image
:
{{
$serviceSpec.extraPodSpec.mainContainer.image
}}
workingDir
:
{{
$serviceSpec.extraPodSpec.mainContainer.workingDir
}}
workingDir
:
{{
$serviceSpec.extraPodSpec.mainContainer.workingDir
}}
{{
- if $serviceSpec.extraPodSpec.mainContainer.command
}}
command
:
{{
- $serviceSpec.extraPodSpec.mainContainer.command | toYaml | nindent 8
}}
{{
- end
}}
{{
- if $serviceSpec.extraPodSpec.mainContainer.args
}}
args
:
args
:
{{
- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 8
}}
{{
- $serviceSpec.extraPodSpec.mainContainer.args | toYaml | nindent 8
}}
{{
- end
}}
{{
if $serviceSpec.resources
}}
{{
if $serviceSpec.resources
}}
resources
:
resources
:
requests
:
requests
:
...
@@ -83,8 +89,8 @@ spec:
...
@@ -83,8 +89,8 @@ spec:
-
name
:
health
-
name
:
health
containerPort
:
{{
$.Values.healthPort | default 5000
}}
containerPort
:
{{
$.Values.healthPort | default 5000
}}
livenessProbe
:
livenessProbe
:
{{
- if $serviceSpec.
extraPodSpec.mainContainer.
livenessProbe
}}
{{
- if $serviceSpec.livenessProbe
}}
{{
$serviceSpec.
extraPodSpec.mainContainer.
livenessProbe | toYaml | nindent 10
}}
{{
$serviceSpec.livenessProbe | toYaml | nindent 10
}}
{{
- else
}}
{{
- else
}}
initialDelaySeconds
:
60
initialDelaySeconds
:
60
periodSeconds
:
60
periodSeconds
:
60
...
@@ -97,8 +103,8 @@ spec:
...
@@ -97,8 +103,8 @@ spec:
scheme
:
HTTP
scheme
:
HTTP
{{
- end
}}
{{
- end
}}
readinessProbe
:
readinessProbe
:
{{
- if $serviceSpec.
extraPodSpec.mainContainer.
readinessProbe
}}
{{
- if $serviceSpec.readinessProbe
}}
{{
$serviceSpec.
extraPodSpec.mainContainer.
readinessProbe | toYaml | nindent 10
}}
{{
$serviceSpec.readinessProbe | toYaml | nindent 10
}}
{{
- else
}}
{{
- else
}}
initialDelaySeconds
:
60
initialDelaySeconds
:
60
periodSeconds
:
60
periodSeconds
:
60
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment