Unverified Commit bfbcae7a authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: setup planner serviceaccount in cluster-wide deployment (#3520)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 07a64744
......@@ -35,11 +35,11 @@ dependencies:
repository: "https://charts.bitnami.com/bitnami"
condition: etcd.enabled
- name: kai-scheduler
version: v0.9.2
version: v0.9.4
repository: oci://ghcr.io/nvidia/kai-scheduler
condition: kai-scheduler.enabled
- name: grove-charts
alias: grove
version: v0.1.0-alpha.1
version: v0.1.0-alpha.2
repository: oci://ghcr.io/nvidia/grove
condition: grove.enabled
......@@ -89,8 +89,8 @@ The chart includes built-in validation to prevent all operator conflicts:
| file://components/operator | dynamo-operator | 0.5.0 |
| https://charts.bitnami.com/bitnami | etcd | 12.0.18 |
| https://nats-io.github.io/k8s/helm/charts/ | nats | 1.3.2 |
| oci://ghcr.io/nvidia/grove | grove(grove-charts) | v0.1.0-alpha.1 |
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.9.2 |
| oci://ghcr.io/nvidia/grove | grove(grove-charts) | v0.1.0-alpha.2 |
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.9.4 |
## Values
......
......@@ -124,6 +124,9 @@ spec:
- --mpi-run-ssh-secret-name={{ .Values.dynamo.mpiRun.secretName }}
- --mpi-run-ssh-secret-namespace={{ .Release.Namespace }}
{{- end }}
{{- if not .Values.namespaceRestriction.enabled }}
- --planner-cluster-role-name={{ include "dynamo-operator.fullname" . }}-planner
{{- end }}
command:
- /manager
env:
......
......@@ -12,12 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.namespaceRestriction.enabled }}
# Namespace-restricted mode: Role + ServiceAccount + RoleBinding
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: planner-serviceaccount
namespace: {{ .Values.namespace }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
{{- if .Values.dynamo.dockerRegistry.useKubernetesSecret }}
imagePullSecrets:
- name: {{ include "dynamo-operator.componentsDockerRegistrySecretName" . }}
......@@ -27,7 +32,9 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: planner-role
namespace: {{ .Values.namespace }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
rules:
- apiGroups: ["nvidia.com"]
resources: ["dynamocomponentdeployments", "dynamographdeployments"]
......@@ -37,12 +44,28 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: planner-binding
namespace: {{ .Values.namespace }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
name: planner-serviceaccount
namespace: {{ .Values.namespace }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: Role
name: planner-role
apiGroup: rbac.authorization.k8s.io
\ No newline at end of file
apiGroup: rbac.authorization.k8s.io
{{- else }}
# Cluster-wide mode: ClusterRole for planner
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-planner
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
rules:
- apiGroups: ["nvidia.com"]
resources: ["dynamocomponentdeployments", "dynamographdeployments"]
verbs: ["get", "list", "create", "update", "patch"]
{{- end }}
\ No newline at end of file
......@@ -60,6 +60,7 @@ import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller"
commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/etcd"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/rbac"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/secret"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/secrets"
istioclientsetscheme "istio.io/client-go/pkg/clientset/versioned/scheme"
......@@ -116,6 +117,7 @@ func init() {
//+kubebuilder:scaffold:scheme
}
//nolint:gocyclo
func main() {
var metricsAddr string
var enableLeaderElection bool
......@@ -137,6 +139,7 @@ func main() {
var prometheusEndpoint string
var mpiRunSecretName string
var mpiRunSecretNamespace string
var plannerClusterRoleName string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
......@@ -175,12 +178,19 @@ func main() {
"Name of the secret containing the SSH key for MPI Run (required)")
flag.StringVar(&mpiRunSecretNamespace, "mpi-run-ssh-secret-namespace", "",
"Namespace where the MPI SSH secret is located (required)")
flag.StringVar(&plannerClusterRoleName, "planner-cluster-role-name", "",
"Name of the ClusterRole for planner (cluster-wide mode only)")
opts := zap.Options{
Development: true,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()
if restrictedNamespace == "" && plannerClusterRoleName == "" {
setupLog.Error(nil, "planner-cluster-role-name is required in cluster-wide mode")
os.Exit(1)
}
// Validate modelExpressURL if provided
if modelExpressURL != "" {
if _, err := url.Parse(modelExpressURL); err != nil {
......@@ -225,6 +235,9 @@ func main() {
MpiRun: commonController.MpiRunConfig{
SecretName: mpiRunSecretName,
},
RBAC: commonController.RBACConfig{
PlannerClusterRoleName: plannerClusterRoleName,
},
}
mainCtx := ctrl.SetupSignalHandler()
......@@ -421,6 +434,9 @@ func main() {
os.Exit(1)
}
// Initialize RBAC manager for cross-namespace resource management
rbacManager := rbac.NewManager(mgr.GetClient())
if err = (&controller.DynamoGraphDeploymentReconciler{
Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor("dynamographdeployment"),
......@@ -428,6 +444,7 @@ func main() {
DockerSecretRetriever: dockerSecretRetriever,
ScaleClient: scaleClient,
MPISecretReplicator: mpiSecretReplicator,
RBACManager: rbacManager,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment")
os.Exit(1)
......
......@@ -6,7 +6,7 @@ toolchain go1.24.3
require (
emperror.dev/errors v0.8.1
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.2
github.com/bsm/gomega v1.27.10
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.7.0
......
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1 h1:4DE6ZGa/3muBa5gk1GtJskMVss6GjeCPpn+xTnR1h9w=
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.1/go.mod h1:QlsR2wQLj9m/zVEqv5SsCPzyjN2ykYZ0r/NEnDf4WB4=
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.2 h1:Xg2hrC5eKJ0jhStGFoGX0DnUdt/75K5fJvOxKkN54oU=
github.com/NVIDIA/grove/operator/api v0.1.0-alpha.2/go.mod h1:QlsR2wQLj9m/zVEqv5SsCPzyjN2ykYZ0r/NEnDf4WB4=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
......
......@@ -63,6 +63,11 @@ type etcdStorage interface {
DeleteKeys(ctx context.Context, prefix string) error
}
// rbacManager interface for managing RBAC resources
type rbacManager interface {
EnsureServiceAccountWithRBAC(ctx context.Context, targetNamespace, serviceAccountName, clusterRoleName string) error
}
// DynamoGraphDeploymentReconciler reconciles a DynamoGraphDeployment object
type DynamoGraphDeploymentReconciler struct {
client.Client
......@@ -71,6 +76,7 @@ type DynamoGraphDeploymentReconciler struct {
DockerSecretRetriever dockerSecretRetriever
ScaleClient scale.ScalesGetter
MPISecretReplicator *secret.SecretReplicator
RBACManager rbacManager
}
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
......@@ -158,6 +164,25 @@ type Resource interface {
func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (State, Reason, Message, error) {
logger := log.FromContext(ctx)
// Ensure planner RBAC exists in cluster-wide mode
if r.Config.RestrictedNamespace == "" {
if r.RBACManager == nil {
return "", "", "", fmt.Errorf("RBAC manager not initialized in cluster-wide mode")
}
if r.Config.RBAC.PlannerClusterRoleName == "" {
return "", "", "", fmt.Errorf("planner ClusterRole name is required in cluster-wide mode")
}
if err := r.RBACManager.EnsureServiceAccountWithRBAC(
ctx,
dynamoDeployment.Namespace,
consts.PlannerServiceAccountName,
r.Config.RBAC.PlannerClusterRoleName,
); err != nil {
logger.Error(err, "Failed to ensure planner RBAC")
return "", "", "", fmt.Errorf("failed to ensure planner RBAC: %w", err)
}
}
// Reconcile top-level PVCs first
err := r.reconcilePVCs(ctx, dynamoDeployment)
if err != nil {
......
......@@ -66,6 +66,14 @@ type Config struct {
// PrometheusEndpoint is the URL of the Prometheus endpoint to use for metrics
PrometheusEndpoint string
MpiRun MpiRunConfig
// RBAC configuration for cross-namespace resource management
RBAC RBACConfig
}
// RBACConfig holds configuration for RBAC management
type RBACConfig struct {
// PlannerClusterRoleName is the name of the ClusterRole for planner (cluster-wide mode only)
PlannerClusterRoleName string
}
type IngressConfig struct {
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
package rbac
import (
"context"
"fmt"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)
const (
// RBAC resource kind constants
kindClusterRole = "ClusterRole"
kindServiceAccount = "ServiceAccount"
apiGroupRBAC = "rbac.authorization.k8s.io"
)
// Manager handles dynamic RBAC creation for cluster-wide operator installations.
type Manager struct {
client client.Client
}
// NewManager creates a new RBAC manager.
func NewManager(client client.Client) *Manager {
return &Manager{client: client}
}
// needsRoleRefRecreate checks if the RoleRef has changed, which requires
// deleting and recreating the RoleBinding since RoleRef is immutable.
func needsRoleRefRecreate(existing *rbacv1.RoleBinding, clusterRoleName string) bool {
return existing.RoleRef.Name != clusterRoleName ||
existing.RoleRef.Kind != kindClusterRole ||
existing.RoleRef.APIGroup != apiGroupRBAC
}
// needsSubjectUpdate checks if the Subjects field needs updating.
// Subjects are mutable so they can be updated in-place.
func needsSubjectUpdate(existing *rbacv1.RoleBinding, serviceAccountName, targetNamespace string) bool {
return len(existing.Subjects) != 1 ||
existing.Subjects[0].Kind != kindServiceAccount ||
existing.Subjects[0].Name != serviceAccountName ||
existing.Subjects[0].Namespace != targetNamespace
}
// EnsureServiceAccountWithRBAC creates or updates a ServiceAccount and RoleBinding
// in the target namespace. This should ONLY be called in cluster-wide mode.
//
// In cluster-wide mode, the operator dynamically creates:
// - ServiceAccount in the target namespace
// - RoleBinding in the target namespace that binds the SA to a ClusterRole
//
// The ClusterRole must already exist (created by Helm).
//
// Parameters:
// - ctx: context
// - targetNamespace: namespace to create RBAC resources in
// - serviceAccountName: name of the ServiceAccount to create
// - clusterRoleName: name of the ClusterRole to bind to (must exist)
func (m *Manager) EnsureServiceAccountWithRBAC(
ctx context.Context,
targetNamespace string,
serviceAccountName string,
clusterRoleName string,
) error {
logger := log.FromContext(ctx)
if targetNamespace == "" {
return fmt.Errorf("target namespace is required")
}
if serviceAccountName == "" {
return fmt.Errorf("service account name is required")
}
if clusterRoleName == "" {
return fmt.Errorf("cluster role name is required")
}
// Verify ClusterRole exists before creating RoleBinding
clusterRole := &rbacv1.ClusterRole{}
if err := m.client.Get(ctx, client.ObjectKey{Name: clusterRoleName}, clusterRole); err != nil {
if apierrors.IsNotFound(err) {
return fmt.Errorf("cluster role %q does not exist: ensure it is created by Helm before deploying components", clusterRoleName)
}
return fmt.Errorf("failed to verify cluster role %q: %w", clusterRoleName, err)
}
logger.V(1).Info("ClusterRole verified",
"clusterRole", clusterRoleName,
"rules", len(clusterRole.Rules))
// Create/update ServiceAccount
sa := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: serviceAccountName,
Namespace: targetNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": serviceAccountName,
},
},
}
if err := m.client.Get(ctx, client.ObjectKeyFromObject(sa), sa); err != nil {
if !apierrors.IsNotFound(err) {
return fmt.Errorf("failed to get service account: %w", err)
}
// ServiceAccount doesn't exist, create it
if err := m.client.Create(ctx, sa); err != nil {
return fmt.Errorf("failed to create service account: %w", err)
}
logger.V(1).Info("ServiceAccount created",
"serviceAccount", serviceAccountName,
"namespace", targetNamespace)
} else {
logger.V(1).Info("ServiceAccount already exists",
"serviceAccount", serviceAccountName,
"namespace", targetNamespace)
}
// Create/update RoleBinding
roleBindingName := fmt.Sprintf("%s-binding", serviceAccountName)
rb := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: roleBindingName,
Namespace: targetNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": serviceAccountName,
},
},
Subjects: []rbacv1.Subject{{
Kind: kindServiceAccount,
Name: serviceAccountName,
Namespace: targetNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: apiGroupRBAC,
Kind: kindClusterRole,
Name: clusterRoleName,
},
}
existingRB := &rbacv1.RoleBinding{}
if err := m.client.Get(ctx, client.ObjectKeyFromObject(rb), existingRB); err != nil {
if !apierrors.IsNotFound(err) {
return fmt.Errorf("failed to get role binding: %w", err)
}
// RoleBinding doesn't exist, create it
if err := m.client.Create(ctx, rb); err != nil {
return fmt.Errorf("failed to create role binding: %w", err)
}
logger.V(1).Info("RoleBinding created",
"roleBinding", roleBindingName,
"clusterRole", clusterRoleName,
"namespace", targetNamespace)
} else {
// RoleBinding exists, check if it needs updating
needsRecreate := needsRoleRefRecreate(existingRB, clusterRoleName)
needsUpdate := needsSubjectUpdate(existingRB, serviceAccountName, targetNamespace)
if needsRecreate {
// RoleRef is immutable, so delete and recreate the RoleBinding
if err := m.client.Delete(ctx, existingRB); err != nil {
return fmt.Errorf("failed to delete role binding for recreation: %w", err)
}
logger.V(1).Info("RoleBinding deleted for recreation due to RoleRef change",
"roleBinding", roleBindingName,
"oldClusterRole", existingRB.RoleRef.Name,
"newClusterRole", clusterRoleName,
"namespace", targetNamespace)
// Recreate with new RoleRef
if err := m.client.Create(ctx, rb); err != nil {
return fmt.Errorf("failed to recreate role binding: %w", err)
}
logger.V(1).Info("RoleBinding recreated",
"roleBinding", roleBindingName,
"clusterRole", clusterRoleName,
"namespace", targetNamespace)
} else if needsUpdate {
// Only Subjects changed, can update in-place
existingRB.Subjects = rb.Subjects
if err := m.client.Update(ctx, existingRB); err != nil {
return fmt.Errorf("failed to update role binding: %w", err)
}
logger.V(1).Info("RoleBinding subjects updated",
"roleBinding", roleBindingName,
"namespace", targetNamespace)
} else {
logger.V(1).Info("RoleBinding already up-to-date",
"roleBinding", roleBindingName,
"clusterRole", clusterRoleName,
"namespace", targetNamespace)
}
}
return nil
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
package rbac
import (
"context"
"strings"
"testing"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)
const (
// Test constants
testServiceAccountName = "test-sa"
testNamespace = "test-namespace"
testClusterRoleName = "test-cluster-role"
testRoleBindingName = "test-sa-binding"
)
func setupTest() (client.Client, *runtime.Scheme) {
scheme := runtime.NewScheme()
_ = corev1.AddToScheme(scheme)
_ = rbacv1.AddToScheme(scheme)
fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build()
return fakeClient, scheme
}
func setupTestWithClusterRole(clusterRoleName string) client.Client {
scheme := runtime.NewScheme()
_ = corev1.AddToScheme(scheme)
_ = rbacv1.AddToScheme(scheme)
// Pre-create ClusterRole
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: clusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole).
Build()
return fakeClient
}
func TestEnsureServiceAccountWithRBAC_CreateNew(t *testing.T) {
// Setup
fakeClient := setupTestWithClusterRole(testClusterRoleName)
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Check ServiceAccount was created
sa := &corev1.ServiceAccount{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testServiceAccountName,
}, sa)
if err != nil {
t.Fatalf("ServiceAccount not created: %v", err)
}
// Verify ServiceAccount labels
expectedLabels := map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
}
for k, v := range expectedLabels {
if sa.Labels[k] != v {
t.Errorf("Expected label %s=%s, got %s", k, v, sa.Labels[k])
}
}
// Check RoleBinding was created
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not created: %v", err)
}
// Verify RoleBinding configuration
if len(rb.Subjects) != 1 {
t.Fatalf("Expected 1 subject, got %d", len(rb.Subjects))
}
if rb.Subjects[0].Kind != "ServiceAccount" {
t.Errorf("Expected subject kind ServiceAccount, got %s", rb.Subjects[0].Kind)
}
if rb.Subjects[0].Name != testServiceAccountName {
t.Errorf("Expected subject name test-sa, got %s", rb.Subjects[0].Name)
}
if rb.Subjects[0].Namespace != testNamespace {
t.Errorf("Expected subject namespace test-namespace, got %s", rb.Subjects[0].Namespace)
}
// Verify RoleRef
if rb.RoleRef.Kind != "ClusterRole" {
t.Errorf("Expected RoleRef kind ClusterRole, got %s", rb.RoleRef.Kind)
}
if rb.RoleRef.Name != testClusterRoleName {
t.Errorf("Expected RoleRef name test-cluster-role, got %s", rb.RoleRef.Name)
}
if rb.RoleRef.APIGroup != "rbac.authorization.k8s.io" {
t.Errorf("Expected RoleRef APIGroup rbac.authorization.k8s.io, got %s", rb.RoleRef.APIGroup)
}
}
func TestEnsureServiceAccountWithRBAC_AlreadyExists(t *testing.T) {
// Setup - pre-create ClusterRole, ServiceAccount and RoleBinding
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
},
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
},
},
Subjects: []rbacv1.Subject{{
Kind: "ServiceAccount",
Name: testServiceAccountName,
Namespace: testNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: testClusterRoleName,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify resources still exist and unchanged
sa := &corev1.ServiceAccount{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testServiceAccountName,
}, sa)
if err != nil {
t.Fatalf("ServiceAccount not found: %v", err)
}
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
}
func TestEnsureServiceAccountWithRBAC_UpdateRoleBinding(t *testing.T) {
// Setup - pre-create ClusterRole, ServiceAccount and RoleBinding with wrong subject
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
},
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
},
},
Subjects: []rbacv1.Subject{{
Kind: "ServiceAccount",
Name: "wrong-sa", // Wrong name
Namespace: testNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: testClusterRoleName,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify RoleBinding was updated with correct subject
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if len(rb.Subjects) != 1 {
t.Fatalf("Expected 1 subject, got %d", len(rb.Subjects))
}
if rb.Subjects[0].Name != testServiceAccountName {
t.Errorf("Expected subject name test-sa, got %s", rb.Subjects[0].Name)
}
}
func TestEnsureServiceAccountWithRBAC_MultipleNamespaces(t *testing.T) {
// Setup
fakeClient := setupTestWithClusterRole(testClusterRoleName)
manager := NewManager(fakeClient)
ctx := context.Background()
namespaces := []string{"ns1", "ns2", "ns3"}
// Execute - create RBAC in multiple namespaces
for _, ns := range namespaces {
err := manager.EnsureServiceAccountWithRBAC(
ctx,
ns,
testServiceAccountName,
testClusterRoleName,
)
if err != nil {
t.Fatalf("Failed for namespace %s: %v", ns, err)
}
}
// Verify - check resources exist in all namespaces
for _, ns := range namespaces {
sa := &corev1.ServiceAccount{}
err := fakeClient.Get(ctx, client.ObjectKey{
Namespace: ns,
Name: testServiceAccountName,
}, sa)
if err != nil {
t.Errorf("ServiceAccount not found in namespace %s: %v", ns, err)
}
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: ns,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Errorf("RoleBinding not found in namespace %s: %v", ns, err)
}
}
}
func TestEnsureServiceAccountWithRBAC_ServiceAccountExistsRoleBindingDoesNot(t *testing.T) {
// Setup - pre-create only ServiceAccount and ClusterRole
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "rbac",
"app.kubernetes.io/name": testServiceAccountName,
},
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify ServiceAccount still exists
sa := &corev1.ServiceAccount{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testServiceAccountName,
}, sa)
if err != nil {
t.Fatalf("ServiceAccount not found: %v", err)
}
// Verify RoleBinding was created
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not created: %v", err)
}
}
func TestEnsureServiceAccountWithRBAC_Idempotency(t *testing.T) {
// Setup
fakeClient := setupTestWithClusterRole(testClusterRoleName)
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute multiple times
for i := 0; i < 3; i++ {
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
if err != nil {
t.Fatalf("Iteration %d failed: %v", i, err)
}
}
// Verify - should still have exactly one ServiceAccount and one RoleBinding
saList := &corev1.ServiceAccountList{}
err := fakeClient.List(ctx, saList, client.InNamespace(testNamespace))
if err != nil {
t.Fatalf("Failed to list ServiceAccounts: %v", err)
}
if len(saList.Items) != 1 {
t.Errorf("Expected 1 ServiceAccount, got %d", len(saList.Items))
}
rbList := &rbacv1.RoleBindingList{}
err = fakeClient.List(ctx, rbList, client.InNamespace(testNamespace))
if err != nil {
t.Fatalf("Failed to list RoleBindings: %v", err)
}
if len(rbList.Items) != 1 {
t.Errorf("Expected 1 RoleBinding, got %d", len(rbList.Items))
}
}
func TestNewManager(t *testing.T) {
// Setup
fakeClient, _ := setupTest()
// Execute
manager := NewManager(fakeClient)
// Verify
if manager == nil {
t.Fatal("Expected non-nil manager")
}
if manager.client == nil {
t.Fatal("Expected non-nil client in manager")
}
}
func TestEnsureServiceAccountWithRBAC_ClusterRoleNotFound(t *testing.T) {
// Setup - no ClusterRole created
fakeClient, _ := setupTest()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
"non-existent-cluster-role",
)
// Verify - should fail with clear error message
if err == nil {
t.Fatal("Expected error when ClusterRole doesn't exist, got nil")
}
expectedMsg := "cluster role \"non-existent-cluster-role\" does not exist"
if !strings.Contains(err.Error(), expectedMsg) {
t.Errorf("Expected error message to contain %q, got: %v", expectedMsg, err)
}
// Verify no ServiceAccount or RoleBinding was created
sa := &corev1.ServiceAccount{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testServiceAccountName,
}, sa)
if !apierrors.IsNotFound(err) {
t.Error("Expected ServiceAccount not to be created when ClusterRole is missing")
}
}
func TestEnsureServiceAccountWithRBAC_DifferentClusterRoles(t *testing.T) {
// Setup - create two ClusterRoles
_, scheme := setupTest()
clusterRole1 := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster-role-1",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments"},
Verbs: []string{"get", "list"},
},
},
}
clusterRole2 := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster-role-2",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamographdeployments"},
Verbs: []string{"get", "list"},
},
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole1, clusterRole2).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute - create with first cluster role
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
"cluster-role-1",
)
if err != nil {
t.Fatalf("First call failed: %v", err)
}
// Verify first cluster role
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if rb.RoleRef.Name != "cluster-role-1" {
t.Errorf("Expected RoleRef name cluster-role-1, got %s", rb.RoleRef.Name)
}
// Execute - change to second cluster role (should delete and recreate)
err = manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
"cluster-role-2",
)
if err != nil {
t.Fatalf("Second call failed: %v", err)
}
// Verify cluster role was changed
rb = &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found after update: %v", err)
}
if rb.RoleRef.Name != "cluster-role-2" {
t.Errorf("Expected RoleRef name cluster-role-2 after update, got %s", rb.RoleRef.Name)
}
}
func TestEnsureServiceAccountWithRBAC_EmptyNamespace(t *testing.T) {
// Setup
fakeClient, _ := setupTest()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute with empty namespace
err := manager.EnsureServiceAccountWithRBAC(
ctx,
"",
testServiceAccountName,
testClusterRoleName,
)
// Verify - should fail because namespace is required
// The fake client might not enforce this, but in real K8s it would fail
// We just verify the function returns (it might succeed with fake client)
if err == nil {
// Check if resources were created in empty namespace
sa := &corev1.ServiceAccount{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: "",
Name: testServiceAccountName,
}, sa)
// In fake client this might work, but we document the behavior
if err != nil && !apierrors.IsNotFound(err) {
t.Logf("Expected behavior: empty namespace handled: %v", err)
}
}
}
func TestEnsureServiceAccountWithRBAC_RoleRefChange(t *testing.T) {
// Setup - pre-create both ClusterRoles, ServiceAccount, and RoleBinding
_, scheme := setupTest()
oldClusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "old-cluster-role",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments"},
Verbs: []string{"get"},
},
},
}
newClusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "new-cluster-role",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamographdeployments"},
Verbs: []string{"get", "list"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
},
Subjects: []rbacv1.Subject{{
Kind: "ServiceAccount",
Name: testServiceAccountName,
Namespace: testNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: "old-cluster-role", // Old ClusterRole name
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(oldClusterRole, newClusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute with new ClusterRole name
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
"new-cluster-role", // New ClusterRole name
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify RoleBinding was recreated with new RoleRef
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if rb.RoleRef.Name != "new-cluster-role" {
t.Errorf("Expected RoleRef name new-cluster-role, got %s", rb.RoleRef.Name)
}
if rb.Subjects[0].Name != testServiceAccountName {
t.Errorf("Expected subject name test-sa, got %s", rb.Subjects[0].Name)
}
}
func TestEnsureServiceAccountWithRBAC_SubjectWrongNamespace(t *testing.T) {
// Setup - pre-create ClusterRole, ServiceAccount, and RoleBinding with wrong subject namespace
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
},
Subjects: []rbacv1.Subject{{
Kind: "ServiceAccount",
Name: testServiceAccountName,
Namespace: "wrong-namespace", // Wrong namespace
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: testClusterRoleName,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify RoleBinding was updated with correct namespace
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if len(rb.Subjects) != 1 {
t.Fatalf("Expected 1 subject, got %d", len(rb.Subjects))
}
if rb.Subjects[0].Namespace != testNamespace {
t.Errorf("Expected subject namespace test-namespace, got %s", rb.Subjects[0].Namespace)
}
if rb.Subjects[0].Name != testServiceAccountName {
t.Errorf("Expected subject name test-sa, got %s", rb.Subjects[0].Name)
}
}
func TestEnsureServiceAccountWithRBAC_SubjectWrongKind(t *testing.T) {
// Setup - pre-create ClusterRole, ServiceAccount, and RoleBinding with wrong subject kind
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
},
Subjects: []rbacv1.Subject{{
Kind: "User", // Wrong kind
Name: testServiceAccountName,
Namespace: testNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "ClusterRole",
Name: testClusterRoleName,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify RoleBinding was updated with correct kind
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if len(rb.Subjects) != 1 {
t.Fatalf("Expected 1 subject, got %d", len(rb.Subjects))
}
if rb.Subjects[0].Kind != "ServiceAccount" {
t.Errorf("Expected subject kind ServiceAccount, got %s", rb.Subjects[0].Kind)
}
if rb.Subjects[0].Name != testServiceAccountName {
t.Errorf("Expected subject name test-sa, got %s", rb.Subjects[0].Name)
}
}
func TestEnsureServiceAccountWithRBAC_RoleRefKindChange(t *testing.T) {
// Setup - pre-create ClusterRole, ServiceAccount, and RoleBinding with wrong RoleRef kind
_, scheme := setupTest()
clusterRole := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterRoleName,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{"nvidia.com"},
Resources: []string{"dynamocomponentdeployments", "dynamographdeployments"},
Verbs: []string{"get", "list", "create", "update", "patch"},
},
},
}
existingSA := &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: testServiceAccountName,
Namespace: testNamespace,
},
}
existingRB := &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: testRoleBindingName,
Namespace: testNamespace,
},
Subjects: []rbacv1.Subject{{
Kind: "ServiceAccount",
Name: testServiceAccountName,
Namespace: testNamespace,
}},
RoleRef: rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Role", // Wrong kind (should be ClusterRole)
Name: testClusterRoleName,
},
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(clusterRole, existingSA, existingRB).
Build()
manager := NewManager(fakeClient)
ctx := context.Background()
// Execute
err := manager.EnsureServiceAccountWithRBAC(
ctx,
testNamespace,
testServiceAccountName,
testClusterRoleName,
)
// Verify
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
// Verify RoleBinding was recreated with correct RoleRef kind
rb := &rbacv1.RoleBinding{}
err = fakeClient.Get(ctx, client.ObjectKey{
Namespace: testNamespace,
Name: testRoleBindingName,
}, rb)
if err != nil {
t.Fatalf("RoleBinding not found: %v", err)
}
if rb.RoleRef.Kind != "ClusterRole" {
t.Errorf("Expected RoleRef kind ClusterRole, got %s", rb.RoleRef.Kind)
}
if rb.RoleRef.Name != testClusterRoleName {
t.Errorf("Expected RoleRef name test-cluster-role, got %s", rb.RoleRef.Name)
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment