criu.go 3.72 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// criu provides CRIU-specific configuration and utilities for restore operations.
package restore

import (
	"os"

	criurpc "github.com/checkpoint-restore/go-criu/v7/rpc"
	"github.com/sirupsen/logrus"
	"golang.org/x/sys/unix"
	"google.golang.org/protobuf/proto"

	"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/common"
)

// CRIURestoreConfig holds configuration for CRIU restore operations.
// Most options are always-on with safe defaults for K8s environments.
type CRIURestoreConfig struct {
	ImageDirFD   int32
	RootPath     string
	LogLevel     int32
	LogFile      string
	WorkDirFD    int32
	NetNsFD      int32
	ExtMountMaps []*criurpc.ExtMountMap
}

// OpenImageDir opens a checkpoint directory and clears CLOEXEC for CRIU.
// Returns the opened file and its FD. Caller must close the file when done.
func OpenImageDir(checkpointPath string) (*os.File, int32, error) {
	return common.OpenDirForCRIU(checkpointPath)
}

// OpenNetworkNamespace opens the target network namespace for restore.
// Returns the opened file and its FD. Caller must close the file when done.
func OpenNetworkNamespace(nsPath string) (*os.File, int32, error) {
	return common.OpenDirForCRIU(nsPath)
}

// OpenWorkDir opens a work directory for CRIU and clears CLOEXEC.
// Returns the opened file and its FD, or nil/-1 if workDir is empty or fails.
func OpenWorkDir(workDir string, log *logrus.Entry) (*os.File, int32) {
	if workDir == "" {
		return nil, -1
	}

	// Ensure work directory exists
	if err := os.MkdirAll(workDir, 0755); err != nil {
		log.WithError(err).Warn("Failed to create CRIU work directory, using default")
		return nil, -1
	}

	workDirFile, err := os.Open(workDir)
	if err != nil {
		log.WithError(err).Warn("Failed to open CRIU work directory, using default")
		return nil, -1
	}

	if _, err := unix.FcntlInt(workDirFile.Fd(), unix.F_SETFD, 0); err != nil {
		log.WithError(err).Warn("Failed to clear CLOEXEC on work dir")
		workDirFile.Close()
		return nil, -1
	}

	log.WithField("path", workDir).Info("Using custom CRIU work directory")
	return workDirFile, int32(workDirFile.Fd())
}

// BuildRestoreCRIUOpts creates CRIU options for restore from a config struct.
//
// Always-on options for K8s:
//   - ShellJob: containers are often session leaders
//   - TcpClose: pod IPs change on restore/migration
//   - FileLocks: applications use file locks
//   - ExtUnixSk: containers have external Unix sockets
//   - ManageCgroups (IGNORE): let K8s manage cgroups
func BuildRestoreCRIUOpts(cfg CRIURestoreConfig) *criurpc.CriuOpts {
	cgMode := criurpc.CriuCgMode_IGNORE

	criuOpts := &criurpc.CriuOpts{
		ImagesDirFd: proto.Int32(cfg.ImageDirFD),
		LogLevel:    proto.Int32(cfg.LogLevel),
		LogFile:     proto.String(cfg.LogFile),

		// Root filesystem - use current container's root
		Root: proto.String(cfg.RootPath),

		// Restore in detached mode - process runs in background
		RstSibling: proto.Bool(true),

		// Mount namespace compatibility mode for cross-container restore
		MntnsCompatMode: proto.Bool(true),

		// Always-on for K8s environments
		ShellJob:  proto.Bool(true),
		TcpClose:  proto.Bool(true),
		FileLocks: proto.Bool(true),
		ExtUnixSk: proto.Bool(true),

		// Cgroup management - ignore to avoid conflicts
		ManageCgroups:     proto.Bool(true),
		ManageCgroupsMode: &cgMode,

		// Device and inode handling
		EvasiveDevices: proto.Bool(true),
		ForceIrmap:     proto.Bool(true),

		// External mount mappings
		ExtMnt: cfg.ExtMountMaps,
	}

	// Add network namespace inheritance if provided
	if cfg.NetNsFD >= 0 {
		criuOpts.InheritFd = []*criurpc.InheritFd{
			{
				Key: proto.String("extNetNs"),
				Fd:  proto.Int32(cfg.NetNsFD),
			},
		}
	}

	// Add work directory if specified
	if cfg.WorkDirFD >= 0 {
		criuOpts.WorkDirFd = proto.Int32(cfg.WorkDirFD)
	}

	return criuOpts
}