Unverified Commit ca15a79d authored by Schwinn Saereesitthipitak's avatar Schwinn Saereesitthipitak Committed by GitHub
Browse files

fix(snapshot): replace PID-1 SIGUSR1/SIGCONT contract with file sentinels (#8403)


Signed-off-by: default avatarSchwinn Saereesitthipitak <schwinns@nvidia.com>
parent 8fba4f56
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package protocol
import (
corev1 "k8s.io/api/core/v1"
)
const (
// SnapshotControlVolumeName is the per-pod emptyDir used to carry
// checkpoint/restore lifecycle sentinels written by the snapshot agent
// and observed by the workload. It replaces the SIGUSR1/SIGCONT signals
// that previously required the workload to run as PID 1.
SnapshotControlVolumeName = "snapshot-control"
// SnapshotControlMountPath is where the control volume is mounted inside
// the workload container.
SnapshotControlMountPath = "/snapshot-control"
// SnapshotControlDirEnv is the environment variable exposing the control
// mount path to the workload.
SnapshotControlDirEnv = "DYN_SNAPSHOT_CONTROL_DIR"
// SnapshotCompleteFile is written by the snapshot agent inside the
// control volume when a checkpoint has completed successfully.
SnapshotCompleteFile = "snapshot-complete"
// RestoreCompleteFile is written by the snapshot agent inside the
// control volume when a restore has completed and the workload may
// resume.
RestoreCompleteFile = "restore-complete"
// ReadyForCheckpointFile is written by the workload inside the control
// volume when the model is loaded and the workload is ready for a
// checkpoint. Observed by the checkpoint job's kubelet readiness probe
// on the worker container.
ReadyForCheckpointFile = "ready-for-checkpoint"
)
// EnsureControlVolume adds the snapshot-control emptyDir to the pod spec,
// mounts it on the given container at SnapshotControlMountPath, and sets
// DYN_SNAPSHOT_CONTROL_DIR on the container's env. Idempotent — safe to call
// from multiple code paths (operator checkpoint job, restore pod shaping, etc.).
func EnsureControlVolume(podSpec *corev1.PodSpec, container *corev1.Container) {
if podSpec == nil || container == nil {
return
}
hasVolume := false
for _, v := range podSpec.Volumes {
if v.Name == SnapshotControlVolumeName {
hasVolume = true
break
}
}
if !hasVolume {
podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{
Name: SnapshotControlVolumeName,
VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
})
}
hasMount := false
for _, m := range container.VolumeMounts {
if m.Name == SnapshotControlVolumeName {
hasMount = true
break
}
}
if !hasMount {
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: SnapshotControlVolumeName,
MountPath: SnapshotControlMountPath,
})
}
hasEnv := false
for _, e := range container.Env {
if e.Name == SnapshotControlDirEnv {
hasEnv = true
break
}
}
if !hasEnv {
container.Env = append(container.Env, corev1.EnvVar{
Name: SnapshotControlDirEnv,
Value: SnapshotControlMountPath,
})
}
}
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package protocol
import (
"testing"
corev1 "k8s.io/api/core/v1"
)
func TestEnsureControlVolume(t *testing.T) {
t.Run("adds volume mount and env from empty", func(t *testing.T) {
ps := &corev1.PodSpec{Containers: []corev1.Container{{Name: "main"}}}
EnsureControlVolume(ps, &ps.Containers[0])
if len(ps.Volumes) != 1 || ps.Volumes[0].Name != SnapshotControlVolumeName || ps.Volumes[0].EmptyDir == nil {
t.Fatalf("expected one %s emptyDir volume, got %#v", SnapshotControlVolumeName, ps.Volumes)
}
c := ps.Containers[0]
if len(c.VolumeMounts) != 1 || c.VolumeMounts[0].Name != SnapshotControlVolumeName || c.VolumeMounts[0].MountPath != SnapshotControlMountPath {
t.Fatalf("expected one %s mount at %s, got %#v", SnapshotControlVolumeName, SnapshotControlMountPath, c.VolumeMounts)
}
if len(c.Env) != 1 || c.Env[0].Name != SnapshotControlDirEnv || c.Env[0].Value != SnapshotControlMountPath {
t.Fatalf("expected env %s=%s, got %#v", SnapshotControlDirEnv, SnapshotControlMountPath, c.Env)
}
})
t.Run("idempotent", func(t *testing.T) {
ps := &corev1.PodSpec{Containers: []corev1.Container{{Name: "main"}}}
EnsureControlVolume(ps, &ps.Containers[0])
EnsureControlVolume(ps, &ps.Containers[0])
c := ps.Containers[0]
if len(ps.Volumes) != 1 || len(c.VolumeMounts) != 1 || len(c.Env) != 1 {
t.Fatalf("expected single volume/mount/env after two calls, got volumes=%d mounts=%d env=%d", len(ps.Volumes), len(c.VolumeMounts), len(c.Env))
}
})
t.Run("nil pod spec no-op", func(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Fatalf("expected no panic, got %v", r)
}
}()
EnsureControlVolume(nil, &corev1.Container{})
})
t.Run("nil container no-op", func(t *testing.T) {
ps := &corev1.PodSpec{}
EnsureControlVolume(ps, nil)
if len(ps.Volumes) != 0 {
t.Fatalf("expected no volumes when container is nil, got %#v", ps.Volumes)
}
})
t.Run("preserves existing entries", func(t *testing.T) {
ps := &corev1.PodSpec{
Volumes: []corev1.Volume{{
Name: "other",
VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
}},
Containers: []corev1.Container{{
Name: "main",
VolumeMounts: []corev1.VolumeMount{{Name: "other", MountPath: "/other"}},
Env: []corev1.EnvVar{{Name: "OTHER", Value: "x"}},
}},
}
EnsureControlVolume(ps, &ps.Containers[0])
c := ps.Containers[0]
if len(ps.Volumes) != 2 || len(c.VolumeMounts) != 2 || len(c.Env) != 2 {
t.Fatalf("expected existing + control entries, got volumes=%#v mounts=%#v env=%#v", ps.Volumes, c.VolumeMounts, c.Env)
}
})
}
...@@ -72,6 +72,7 @@ func PrepareRestorePodSpec( ...@@ -72,6 +72,7 @@ func PrepareRestorePodSpec(
if storage.BasePath != "" { if storage.BasePath != "" {
injectCheckpointVolumeMount(container, storage.BasePath) injectCheckpointVolumeMount(container, storage.BasePath)
} }
EnsureControlVolume(podSpec, container)
if isCheckpointReady { if isCheckpointReady {
container.Command = []string{"sleep", "infinity"} container.Command = []string{"sleep", "infinity"}
container.Args = nil container.Args = nil
...@@ -135,6 +136,36 @@ func ValidateRestorePodSpec( ...@@ -135,6 +136,36 @@ func ValidateRestorePodSpec(
return fmt.Errorf("missing %s mount at %s", CheckpointVolumeName, storage.BasePath) return fmt.Errorf("missing %s mount at %s", CheckpointVolumeName, storage.BasePath)
} }
} }
hasControlVolume := false
for _, volume := range podSpec.Volumes {
if volume.Name == SnapshotControlVolumeName && volume.EmptyDir != nil {
hasControlVolume = true
break
}
}
if !hasControlVolume {
return fmt.Errorf("missing %s emptyDir volume; add it via snapshotprotocol.EnsureControlVolume", SnapshotControlVolumeName)
}
hasControlMount := false
for _, mount := range container.VolumeMounts {
if mount.Name == SnapshotControlVolumeName && mount.MountPath == SnapshotControlMountPath {
hasControlMount = true
break
}
}
if !hasControlMount {
return fmt.Errorf("missing %s mount at %s", SnapshotControlVolumeName, SnapshotControlMountPath)
}
hasControlEnv := false
for _, env := range container.Env {
if env.Name == SnapshotControlDirEnv {
hasControlEnv = true
break
}
}
if !hasControlEnv {
return fmt.Errorf("missing %s env var on worker container", SnapshotControlDirEnv)
}
if seccompProfile == "" { if seccompProfile == "" {
return nil return nil
} }
......
package protocol package protocol
import ( import (
"fmt"
"math" "math"
"testing" "testing"
...@@ -85,11 +86,31 @@ func TestNewRestorePod(t *testing.T) { ...@@ -85,11 +86,31 @@ func TestNewRestorePod(t *testing.T) {
if restorePod.Spec.SecurityContext == nil || restorePod.Spec.SecurityContext.SeccompProfile == nil { if restorePod.Spec.SecurityContext == nil || restorePod.Spec.SecurityContext.SeccompProfile == nil {
t.Fatalf("expected seccomp profile to be injected: %#v", restorePod.Spec.SecurityContext) t.Fatalf("expected seccomp profile to be injected: %#v", restorePod.Spec.SecurityContext)
} }
if len(restorePod.Spec.Volumes) != 1 { if len(restorePod.Spec.Volumes) != 2 {
t.Fatalf("expected checkpoint volume, got %#v", restorePod.Spec.Volumes) t.Fatalf("expected checkpoint and snapshot-control volumes, got %#v", restorePod.Spec.Volumes)
} }
if len(restorePod.Spec.Containers[0].VolumeMounts) != 1 { if len(restorePod.Spec.Containers[0].VolumeMounts) != 2 {
t.Fatalf("expected checkpoint mount, got %#v", restorePod.Spec.Containers[0].VolumeMounts) t.Fatalf("expected checkpoint and snapshot-control mounts, got %#v", restorePod.Spec.Containers[0].VolumeMounts)
}
foundMount := false
for _, m := range restorePod.Spec.Containers[0].VolumeMounts {
if m.Name == SnapshotControlVolumeName {
foundMount = true
break
}
}
if !foundMount {
t.Fatalf("expected %s mount, got %#v", SnapshotControlVolumeName, restorePod.Spec.Containers[0].VolumeMounts)
}
foundEnv := false
for _, e := range restorePod.Spec.Containers[0].Env {
if e.Name == SnapshotControlDirEnv {
foundEnv = true
break
}
}
if !foundEnv {
t.Fatalf("expected %s env, got %#v", SnapshotControlDirEnv, restorePod.Spec.Containers[0].Env)
} }
} }
...@@ -117,11 +138,38 @@ func TestPrepareRestorePodSpec(t *testing.T) { ...@@ -117,11 +138,38 @@ func TestPrepareRestorePodSpec(t *testing.T) {
if podSpec.SecurityContext == nil || podSpec.SecurityContext.SeccompProfile == nil { if podSpec.SecurityContext == nil || podSpec.SecurityContext.SeccompProfile == nil {
t.Fatalf("expected seccomp profile to be injected: %#v", podSpec.SecurityContext) t.Fatalf("expected seccomp profile to be injected: %#v", podSpec.SecurityContext)
} }
if len(podSpec.Volumes) != 1 { if len(podSpec.Volumes) != 2 {
t.Fatalf("expected checkpoint volume, got %#v", podSpec.Volumes) t.Fatalf("expected checkpoint and snapshot-control volumes, got %#v", podSpec.Volumes)
}
if len(container.VolumeMounts) != 2 {
t.Fatalf("expected checkpoint and snapshot-control mounts, got %#v", container.VolumeMounts)
}
volCount := 0
for _, v := range podSpec.Volumes {
if v.Name == SnapshotControlVolumeName {
volCount++
}
}
if volCount != 1 {
t.Fatalf("expected single %s volume after repeated calls, got %#v", SnapshotControlVolumeName, podSpec.Volumes)
}
mountCount := 0
for _, m := range container.VolumeMounts {
if m.Name == SnapshotControlVolumeName {
mountCount++
} }
if len(container.VolumeMounts) != 1 { }
t.Fatalf("expected checkpoint mount, got %#v", container.VolumeMounts) if mountCount != 1 {
t.Fatalf("expected single %s mount after repeated calls, got %#v", SnapshotControlVolumeName, container.VolumeMounts)
}
envCount := 0
for _, e := range container.Env {
if e.Name == SnapshotControlDirEnv {
envCount++
}
}
if envCount != 1 {
t.Fatalf("expected single %s env after repeated calls, got %#v", SnapshotControlDirEnv, container.Env)
} }
if len(container.Command) != 2 || container.Command[0] != "sleep" || container.Command[1] != "infinity" { if len(container.Command) != 2 || container.Command[0] != "sleep" || container.Command[1] != "infinity" {
t.Fatalf("expected placeholder command, got %#v", container.Command) t.Fatalf("expected placeholder command, got %#v", container.Command)
...@@ -257,31 +305,42 @@ func TestPrepareRestorePodSpecSynthesizesStartupProbeFromReadiness(t *testing.T) ...@@ -257,31 +305,42 @@ func TestPrepareRestorePodSpecSynthesizesStartupProbeFromReadiness(t *testing.T)
} }
} }
func TestValidateRestorePodSpec(t *testing.T) { func validRestoreSpecFixture(profile string) *corev1.PodSpec {
profile := DefaultSeccompLocalhostProfile return &corev1.PodSpec{
podSpec := &corev1.PodSpec{
SecurityContext: &corev1.PodSecurityContext{ SecurityContext: &corev1.PodSecurityContext{
SeccompProfile: &corev1.SeccompProfile{ SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeLocalhost, Type: corev1.SeccompProfileTypeLocalhost,
LocalhostProfile: &profile, LocalhostProfile: &profile,
}, },
}, },
Volumes: []corev1.Volume{{ Volumes: []corev1.Volume{
{
Name: CheckpointVolumeName, Name: CheckpointVolumeName,
VolumeSource: corev1.VolumeSource{ VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "snapshot-pvc", ClaimName: "snapshot-pvc",
}, },
}, },
}}, },
{
Name: SnapshotControlVolumeName,
VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}},
},
},
Containers: []corev1.Container{{ Containers: []corev1.Container{{
Name: "main", Name: "main",
VolumeMounts: []corev1.VolumeMount{{ VolumeMounts: []corev1.VolumeMount{
Name: CheckpointVolumeName, {Name: CheckpointVolumeName, MountPath: "/checkpoints"},
MountPath: "/checkpoints", {Name: SnapshotControlVolumeName, MountPath: SnapshotControlMountPath},
}}, },
Env: []corev1.EnvVar{{Name: SnapshotControlDirEnv, Value: SnapshotControlMountPath}},
}}, }},
} }
}
func TestValidateRestorePodSpec(t *testing.T) {
profile := DefaultSeccompLocalhostProfile
podSpec := validRestoreSpecFixture(profile)
storage := Storage{ storage := Storage{
Type: StorageTypePVC, Type: StorageTypePVC,
PVCName: "snapshot-pvc", PVCName: "snapshot-pvc",
...@@ -293,17 +352,35 @@ func TestValidateRestorePodSpec(t *testing.T) { ...@@ -293,17 +352,35 @@ func TestValidateRestorePodSpec(t *testing.T) {
} }
badSpec := podSpec.DeepCopy() badSpec := podSpec.DeepCopy()
badSpec.Volumes = nil badSpec.Volumes = []corev1.Volume{badSpec.Volumes[1]}
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing checkpoint-storage volume for PVC snapshot-pvc" { if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing checkpoint-storage volume for PVC snapshot-pvc" {
t.Fatalf("expected missing volume error, got %v", err) t.Fatalf("expected missing volume error, got %v", err)
} }
badSpec = podSpec.DeepCopy() badSpec = podSpec.DeepCopy()
badSpec.Containers[0].VolumeMounts = nil badSpec.Containers[0].VolumeMounts = []corev1.VolumeMount{badSpec.Containers[0].VolumeMounts[1]}
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing checkpoint-storage mount at /checkpoints" { if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing checkpoint-storage mount at /checkpoints" {
t.Fatalf("expected missing mount error, got %v", err) t.Fatalf("expected missing mount error, got %v", err)
} }
badSpec = podSpec.DeepCopy()
badSpec.Volumes = []corev1.Volume{badSpec.Volumes[0]}
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != fmt.Sprintf("missing %s emptyDir volume; add it via snapshotprotocol.EnsureControlVolume", SnapshotControlVolumeName) {
t.Fatalf("expected missing control volume error, got %v", err)
}
badSpec = podSpec.DeepCopy()
badSpec.Containers[0].VolumeMounts = []corev1.VolumeMount{badSpec.Containers[0].VolumeMounts[0]}
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != fmt.Sprintf("missing %s mount at %s", SnapshotControlVolumeName, SnapshotControlMountPath) {
t.Fatalf("expected missing control mount error, got %v", err)
}
badSpec = podSpec.DeepCopy()
badSpec.Containers[0].Env = nil
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != fmt.Sprintf("missing %s env var on worker container", SnapshotControlDirEnv) {
t.Fatalf("expected missing control env error, got %v", err)
}
badSpec = podSpec.DeepCopy() badSpec = podSpec.DeepCopy()
badSpec.SecurityContext = nil badSpec.SecurityContext = nil
if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing localhost seccomp profile" { if err := ValidateRestorePodSpec(badSpec, storage, DefaultSeccompLocalhostProfile); err == nil || err.Error() != "missing localhost seccomp profile" {
...@@ -312,33 +389,9 @@ func TestValidateRestorePodSpec(t *testing.T) { ...@@ -312,33 +389,9 @@ func TestValidateRestorePodSpec(t *testing.T) {
} }
func TestValidateRestorePodSpecAcceptsFirstContainerAsWorker(t *testing.T) { func TestValidateRestorePodSpecAcceptsFirstContainerAsWorker(t *testing.T) {
profile := DefaultSeccompLocalhostProfile podSpec := validRestoreSpecFixture(DefaultSeccompLocalhostProfile)
podSpec := &corev1.PodSpec{ podSpec.Containers[0].Name = "worker"
SecurityContext: &corev1.PodSecurityContext{ podSpec.Containers = append(podSpec.Containers, corev1.Container{Name: "sidecar"})
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeLocalhost,
LocalhostProfile: &profile,
},
},
Volumes: []corev1.Volume{{
Name: CheckpointVolumeName,
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "snapshot-pvc",
},
},
}},
Containers: []corev1.Container{
{
Name: "worker",
VolumeMounts: []corev1.VolumeMount{{
Name: CheckpointVolumeName,
MountPath: "/checkpoints",
}},
},
{Name: "sidecar"},
},
}
storage := Storage{ storage := Storage{
Type: StorageTypePVC, Type: StorageTypePVC,
...@@ -353,33 +406,9 @@ func TestValidateRestorePodSpecAcceptsFirstContainerAsWorker(t *testing.T) { ...@@ -353,33 +406,9 @@ func TestValidateRestorePodSpecAcceptsFirstContainerAsWorker(t *testing.T) {
} }
func TestValidateRestorePodSpecAllowsWorkerWithSidecars(t *testing.T) { func TestValidateRestorePodSpecAllowsWorkerWithSidecars(t *testing.T) {
profile := DefaultSeccompLocalhostProfile podSpec := validRestoreSpecFixture(DefaultSeccompLocalhostProfile)
podSpec := &corev1.PodSpec{ podSpec.Containers[0].Name = "worker"
SecurityContext: &corev1.PodSecurityContext{ podSpec.Containers = append(podSpec.Containers, corev1.Container{Name: "sidecar"})
SeccompProfile: &corev1.SeccompProfile{
Type: corev1.SeccompProfileTypeLocalhost,
LocalhostProfile: &profile,
},
},
Volumes: []corev1.Volume{{
Name: CheckpointVolumeName,
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "snapshot-pvc",
},
},
}},
Containers: []corev1.Container{
{
Name: "worker",
VolumeMounts: []corev1.VolumeMount{{
Name: CheckpointVolumeName,
MountPath: "/checkpoints",
}},
},
{Name: "sidecar"},
},
}
storage := Storage{ storage := Storage{
Type: StorageTypePVC, Type: StorageTypePVC,
......
...@@ -1795,7 +1795,6 @@ _Appears in:_ ...@@ -1795,7 +1795,6 @@ _Appears in:_
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `enabled` _boolean_ | Enabled indicates if checkpoint functionality is enabled | | | | `enabled` _boolean_ | Enabled indicates if checkpoint functionality is enabled | | |
| `readyForCheckpointFilePath` _string_ | ReadyForCheckpointFilePath signals model readiness for checkpoint jobs | /tmp/ready-for-checkpoint | |
| `storage` _[CheckpointStorageConfiguration](#checkpointstorageconfiguration)_ | Deprecated: Storage is retained for compatibility and ignored by the<br />current snapshot flow. Snapshot storage is discovered from the<br />snapshot-agent DaemonSet instead. | | | | `storage` _[CheckpointStorageConfiguration](#checkpointstorageconfiguration)_ | Deprecated: Storage is retained for compatibility and ignored by the<br />current snapshot flow. Snapshot storage is discovered from the<br />snapshot-agent DaemonSet instead. | | |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment