controller_test.go 13.2 KB
Newer Older
1
package controller
2
3
4

import (
	"context"
5
	"errors"
6
7
8
9
10
11
	"os"
	"path/filepath"
	"testing"
	"time"

	"github.com/go-logr/logr/testr"
12
13
	batchv1 "k8s.io/api/batch/v1"
	coordinationv1 "k8s.io/api/coordination/v1"
14
15
	corev1 "k8s.io/api/core/v1"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16
	"k8s.io/apimachinery/pkg/runtime"
17
	"k8s.io/client-go/kubernetes/fake"
18
	clientgotesting "k8s.io/client-go/testing"
19

20
	"github.com/ai-dynamo/dynamo/deploy/snapshot/pkg/types"
21
22
23
)

const testNodeName = "test-node"
24
const testContainerID = "test-container"
25

26
27
// makeTestController creates a NodeController with a fake k8s client and nil executors.
// The fake clientset is empty so any goroutine launched by runCheckpoint/runRestore
28
// will fail on the first annotatePod call and exit cleanly.
29
func makeTestController(t *testing.T, objs ...runtime.Object) *NodeController {
30
	t.Helper()
31
	return &NodeController{
32
33
34
		config: &types.AgentConfig{
			NodeName: testNodeName,
		},
35
		clientset: fake.NewClientset(objs...),
36
		log:       testr.New(t),
37
		holderID:  "test-holder",
38
39
40
41
42
		inFlight:  make(map[string]struct{}),
		stopCh:    make(chan struct{}),
	}
}

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
func makeLease(namespace, name, holder string, renewTime time.Time) *coordinationv1.Lease {
	leaseDurationSeconds := int32(checkpointLeaseDuration.Seconds())
	renewMicroTime := metav1.NewMicroTime(renewTime)
	return &coordinationv1.Lease{
		ObjectMeta: metav1.ObjectMeta{
			Name:      name,
			Namespace: namespace,
		},
		Spec: coordinationv1.LeaseSpec{
			HolderIdentity:       &holder,
			LeaseDurationSeconds: &leaseDurationSeconds,
			AcquireTime:          &renewMicroTime,
			RenewTime:            &renewMicroTime,
		},
	}
}

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
func makePod(name, namespace, nodeName string, phase corev1.PodPhase, ready bool, labels, annotations map[string]string) *corev1.Pod {
	var conditions []corev1.PodCondition
	if ready {
		conditions = append(conditions, corev1.PodCondition{
			Type:   corev1.PodReady,
			Status: corev1.ConditionTrue,
		})
	}
	return &corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:        name,
			Namespace:   namespace,
			Labels:      labels,
			Annotations: annotations,
		},
		Spec: corev1.PodSpec{
			NodeName: nodeName,
			Containers: []corev1.Container{
				{Name: "main"},
			},
		},
		Status: corev1.PodStatus{
			Phase:      phase,
			Conditions: conditions,
		},
	}
}

88
func TestReconcileCheckpointPod(t *testing.T) {
89
90
91
92
93
94
95
	tests := []struct {
		name       string
		nodeName   string
		phase      corev1.PodPhase
		ready      bool
		hash       string
		annotation string
96
		lease      *coordinationv1.Lease
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
		preSeed    bool // pre-populate inFlight to test deduplication
		want       bool // true = pod passes filtering and triggers checkpoint
	}{
		{
			name:     "happy path",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "abc123",
			want:     true,
		},
		{
			name:     "wrong node",
			nodeName: "other-node",
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "abc123",
			want:     false,
		},
		{
			name:     "not running",
			nodeName: testNodeName,
			phase:    corev1.PodPending,
			ready:    false,
			hash:     "abc123",
			want:     false,
		},
		{
			name:     "running but not ready",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    false,
			hash:     "abc123",
			want:     false,
		},
		{
			name:     "missing hash label",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "",
			want:     false,
		},
		{
			name:       "already completed",
			nodeName:   testNodeName,
			phase:      corev1.PodRunning,
			ready:      true,
			hash:       "abc123",
			annotation: "completed",
			want:       false,
		},
		{
150
			name:       "already failed",
151
152
153
154
			nodeName:   testNodeName,
			phase:      corev1.PodRunning,
			ready:      true,
			hash:       "abc123",
155
			annotation: "failed",
156
157
			want:       false,
		},
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
		{
			name:     "active lease held elsewhere",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "abc123",
			lease:    makeLease("default", "checkpoint-job", "other-holder", time.Now()),
			want:     false,
		},
		{
			name:     "expired lease can be reclaimed",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "abc123",
			lease:    makeLease("default", "checkpoint-job", "other-holder", time.Now().Add(-checkpointLeaseDuration-time.Second)),
			want:     true,
		},
176
177
178
179
180
181
182
183
184
185
186
187
188
189
		{
			name:     "duplicate in-flight",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    true,
			hash:     "abc123",
			preSeed:  true,
			want:     false,
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			labels := map[string]string{
190
191
				kubeLabelIsCheckpointSource:    "true",
				"batch.kubernetes.io/job-name": "checkpoint-job",
192
193
194
195
196
			}
			if tc.hash != "" {
				labels[kubeLabelCheckpointHash] = tc.hash
			}

197
198
199
200
201
202
			job := &batchv1.Job{
				ObjectMeta: metav1.ObjectMeta{
					Name:      "checkpoint-job",
					Namespace: "default",
				},
			}
203
			if tc.annotation != "" {
204
				job.Annotations = map[string]string{
205
206
207
208
					kubeAnnotationCheckpointStatus: tc.annotation,
				}
			}

209
210
211
212
213
214
215
			var annotations map[string]string
			if tc.hash != "" {
				annotations = map[string]string{
					kubeAnnotationCheckpointLocation:    "/checkpoints/" + tc.hash,
					kubeAnnotationCheckpointStorageType: "pvc",
				}
			}
216
			pod := makePod("test-pod", "default", tc.nodeName, tc.phase, tc.ready, labels, annotations)
217
218
219
220
221
222
			objs := []runtime.Object{job}
			if tc.lease != nil {
				objs = append(objs, tc.lease)
			}

			w := makeTestController(t, objs...)
223
224
225
226
227
228
			ctx := context.Background()

			if tc.preSeed {
				w.inFlight["default/test-pod"] = struct{}{}
			}

229
			w.reconcileCheckpointPod(ctx, pod)
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250

			// tryAcquire adds to inFlight synchronously before launching the goroutine.
			// For filtered pods, inFlight stays at its original size.
			triggered := len(w.inFlight) > 0 && !tc.preSeed
			if tc.preSeed {
				// Duplicate: inFlight was 1 before and should remain exactly 1
				triggered = false
			}

			if triggered != tc.want {
				t.Errorf("triggered = %v, want %v (inFlight=%d, preSeed=%v)", triggered, tc.want, len(w.inFlight), tc.preSeed)
			}

			// Let the background goroutine (if any) finish before the test ends
			if tc.want {
				time.Sleep(50 * time.Millisecond)
			}
		})
	}
}

251
func TestReconcileRestorePod(t *testing.T) {
252
	tests := []struct {
253
254
255
256
257
258
259
260
261
262
		name                  string
		nodeName              string
		phase                 corev1.PodPhase
		ready                 bool
		hash                  string
		annotationStatus      string
		annotationContainerID string
		createDir             bool // whether to create the checkpoint dir on disk
		preSeed               bool
		want                  bool
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
	}{
		{
			name:      "happy path",
			nodeName:  testNodeName,
			phase:     corev1.PodRunning,
			ready:     false,
			hash:      "abc123",
			createDir: true,
			want:      true,
		},
		{
			name:      "wrong node",
			nodeName:  "other-node",
			phase:     corev1.PodRunning,
			ready:     false,
			hash:      "abc123",
			createDir: true,
			want:      false,
		},
		{
			name:      "not running",
			nodeName:  testNodeName,
			phase:     corev1.PodPending,
			ready:     false,
			hash:      "abc123",
			createDir: true,
			want:      false,
		},
		{
			name:      "already ready",
			nodeName:  testNodeName,
			phase:     corev1.PodRunning,
			ready:     true,
			hash:      "abc123",
			createDir: true,
			want:      false,
		},
		{
			name:     "missing hash",
			nodeName: testNodeName,
			phase:    corev1.PodRunning,
			ready:    false,
			hash:     "",
			want:     false,
		},
		{
			name:      "invalid hash with path traversal",
			nodeName:  testNodeName,
			phase:     corev1.PodRunning,
			ready:     false,
			hash:      "../bad",
			createDir: true,
			want:      false,
		},
		{
318
319
320
321
322
323
324
325
326
			name:                  "already completed for same container",
			nodeName:              testNodeName,
			phase:                 corev1.PodRunning,
			ready:                 false,
			hash:                  "abc123",
			annotationStatus:      "completed",
			annotationContainerID: testContainerID,
			createDir:             true,
			want:                  false,
327
328
		},
		{
329
330
331
332
333
334
335
336
337
			name:                  "already in progress for same container",
			nodeName:              testNodeName,
			phase:                 corev1.PodRunning,
			ready:                 false,
			hash:                  "abc123",
			annotationStatus:      "in_progress",
			annotationContainerID: testContainerID,
			createDir:             true,
			want:                  false,
338
339
		},
		{
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
			name:                  "completed for previous container retries",
			nodeName:              testNodeName,
			phase:                 corev1.PodRunning,
			ready:                 false,
			hash:                  "abc123",
			annotationStatus:      "completed",
			annotationContainerID: "old-container",
			createDir:             true,
			want:                  true,
		},
		{
			name:                  "in progress for previous container retries",
			nodeName:              testNodeName,
			phase:                 corev1.PodRunning,
			ready:                 false,
			hash:                  "abc123",
			annotationStatus:      "in_progress",
			annotationContainerID: "old-container",
			createDir:             true,
			want:                  true,
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
		},
		{
			name:      "checkpoint not on disk",
			nodeName:  testNodeName,
			phase:     corev1.PodRunning,
			ready:     false,
			hash:      "abc123",
			createDir: false,
			want:      false,
		},
		{
			name:      "duplicate in-flight",
			nodeName:  testNodeName,
			phase:     corev1.PodRunning,
			ready:     false,
			hash:      "abc123",
			createDir: true,
			preSeed:   true,
			want:      false,
		},
	}

	for _, tc := range tests {
		t.Run(tc.name, func(t *testing.T) {
			labels := map[string]string{
				kubeLabelIsRestoreTarget: "true",
			}
			if tc.hash != "" {
				labels[kubeLabelCheckpointHash] = tc.hash
			}

391
392
393
			w := makeTestController(t)
			checkpointDir := t.TempDir()

394
			var annotations map[string]string
395
			if tc.annotationStatus != "" {
396
				annotations = map[string]string{
397
398
399
400
401
402
403
					kubeAnnotationRestoreStatus:      tc.annotationStatus,
					kubeAnnotationRestoreContainerID: tc.annotationContainerID,
				}
			}
			if tc.hash != "" {
				if annotations == nil {
					annotations = make(map[string]string)
404
				}
405
406
				annotations[kubeAnnotationCheckpointLocation] = filepath.Join(checkpointDir, tc.hash)
				annotations[kubeAnnotationCheckpointStorageType] = "pvc"
407
408
409
			}

			pod := makePod("test-pod", "default", tc.nodeName, tc.phase, tc.ready, labels, annotations)
410
411
412
413
414
			pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
				Name:        "main",
				Ready:       tc.ready,
				ContainerID: "containerd://" + testContainerID,
			}}
415
416

			if tc.createDir && tc.hash != "" {
417
				dir := filepath.Join(checkpointDir, tc.hash)
418
419
420
421
422
423
424
425
				if err := os.MkdirAll(dir, 0o755); err != nil {
					t.Fatalf("failed to create checkpoint dir: %v", err)
				}
			}

			ctx := context.Background()

			if tc.preSeed {
426
				w.inFlight["default/test-pod/"+testContainerID] = struct{}{}
427
428
			}

429
			w.reconcileRestorePod(ctx, pod)
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

			triggered := len(w.inFlight) > 0 && !tc.preSeed
			if tc.preSeed {
				triggered = false
			}

			if triggered != tc.want {
				t.Errorf("triggered = %v, want %v (inFlight=%d, preSeed=%v)", triggered, tc.want, len(w.inFlight), tc.preSeed)
			}

			// Let the background goroutine (if any) finish before the test ends
			if tc.want {
				time.Sleep(50 * time.Millisecond)
			}
		})
	}
}
447

448
func TestRunCheckpointKeepsLeaseAndInFlightOnTerminalStatusPatchFailure(t *testing.T) {
449
450
451
452
	pod := &corev1.Pod{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "test-pod",
			Namespace: "default",
453
454
455
456
457
458
459
460
461
			Labels: map[string]string{
				"batch.kubernetes.io/job-name": "checkpoint-job",
			},
		},
	}
	job := &batchv1.Job{
		ObjectMeta: metav1.ObjectMeta{
			Name:      "checkpoint-job",
			Namespace: "default",
462
463
		},
	}
464
	lease := makeLease("default", "checkpoint-job", "test-holder", time.Now())
465

466
	clientset := fake.NewClientset(pod.DeepCopy(), job, lease)
467
	patchCalls := 0
468
	clientset.PrependReactor("patch", "jobs", func(clientgotesting.Action) (bool, runtime.Object, error) {
469
470
471
472
		patchCalls++
		return true, nil, errors.New("terminal patch failed")
	})

473
	w := &NodeController{
474
475
476
477
478
		config: &types.AgentConfig{
			NodeName: testNodeName,
		},
		clientset: clientset,
		log:       testr.New(t),
479
		holderID:  "test-holder",
480
481
482
483
484
485
		inFlight: map[string]struct{}{
			"default/test-pod": {},
		},
		stopCh: make(chan struct{}),
	}

486
	err := w.runCheckpoint(context.Background(), pod, job, "abc123", filepath.Join(t.TempDir(), "abc123"), "pvc", "default/test-pod")
487
488
489
490
491
492
	if err == nil {
		t.Fatal("expected terminal checkpoint status update to fail")
	}
	if _, ok := w.inFlight["default/test-pod"]; !ok {
		t.Fatal("checkpoint terminal status failure should keep pod in-flight")
	}
493
494
	if patchCalls != 1 {
		t.Fatalf("patchCalls = %d, want %d", patchCalls, 1)
495
496
	}

497
498
499
	remainingLease, err := clientset.CoordinationV1().Leases("default").Get(context.Background(), "checkpoint-job", metav1.GetOptions{})
	if err != nil {
		t.Fatalf("expected checkpoint lease to remain after terminal status patch failure: %v", err)
500
	}
501
502
	if remainingLease.Spec.HolderIdentity == nil || *remainingLease.Spec.HolderIdentity != "test-holder" {
		t.Fatalf("unexpected remaining lease holder: %#v", remainingLease.Spec.HolderIdentity)
503
504
	}
}