mounts.go 3.67 KB
Newer Older
1
package runtime
2
3
4
5
6
7
8
9
10
11
12

import (
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"syscall"

	"github.com/moby/sys/mountinfo"
	specs "github.com/opencontainers/runtime-spec/specs-go"

13
	"github.com/ai-dynamo/dynamo/deploy/snapshot/internal/types"
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
)

// ReadMountInfo reads and parses mountinfo for a container process via /host/proc.
func ReadMountInfo(pid int) ([]types.MountInfo, error) {
	mountinfoPath := fmt.Sprintf("%s/%d/mountinfo", HostProcPath, pid)
	f, err := os.Open(mountinfoPath)
	if err != nil {
		return nil, fmt.Errorf("failed to open mountinfo: %w", err)
	}
	defer f.Close()

	infos, err := mountinfo.GetMountsFromReader(f, nil)
	if err != nil {
		return nil, fmt.Errorf("failed to parse mountinfo: %w", err)
	}

	mounts := make([]types.MountInfo, 0, len(infos))
	for _, info := range infos {
		mounts = append(mounts, types.MountInfo{
			MountPoint: info.Mountpoint,
			FSType:     info.FSType,
			VFSOptions: info.VFSOptions,
		})
	}
	return mounts, nil
}

// ClassifyMounts sets IsOCIManaged on each mount by matching against the
// container's OCI spec (mounts, masked paths, readonly paths).
// Handles /run/ ↔ /var/run/ aliasing since some images symlink one to the other.
func ClassifyMounts(mounts []types.MountInfo, ociSpec *specs.Spec, rootFS string) []types.MountInfo {
	ociSet := collectOCIManagedPaths(ociSpec, rootFS)

	for i := range mounts {
		mp := mounts[i].MountPoint
		if _, ok := ociSet[mp]; ok {
			mounts[i].IsOCIManaged = true
			continue
		}
		// /run/ ↔ /var/run/ aliasing
		if strings.HasPrefix(mp, "/run/") {
			if _, ok := ociSet["/var"+mp]; ok {
				mounts[i].IsOCIManaged = true
				continue
			}
		}
		if strings.HasPrefix(mp, "/var/run/") {
			if _, ok := ociSet[strings.TrimPrefix(mp, "/var")]; ok {
				mounts[i].IsOCIManaged = true
			}
		}
	}

	return mounts
}

// BuildMountPolicy classifies mounts and masked paths for CRIU dump.
// Mounts must already have IsOCIManaged set by ClassifyMounts.
//
// Policy (evaluated top to bottom):
//  1. Skip: non-OCI /proc/*, /sys/*, /run/* submounts (virtual/runtime, not in placeholder)
//  2. Native: /dev/shm tmpfs (CRIU saves and restores content)
//  3. Masked: OCI masked non-directory paths that exist under rootFS → /dev/null
//  4. Externalize: everything else (OCI mounts the runtime recreates in placeholder)
func BuildMountPolicy(mounts []types.MountInfo, rootFS string, maskedPaths []string) (map[string]string, []string) {
	extMap := make(map[string]string, len(mounts))
	var skipped []string

	for _, m := range mounts {
		if m.MountPoint == "" {
			continue
		}

		// Skip non-OCI virtual/runtime mounts — these won't exist in the placeholder
		if !m.IsOCIManaged && (strings.HasPrefix(m.MountPoint, "/proc/") || strings.HasPrefix(m.MountPoint, "/sys/") || strings.HasPrefix(m.MountPoint, "/run/")) {
			skipped = append(skipped, m.MountPoint)
			continue
		}

		// Let CRIU handle /dev/shm content natively — don't externalize it.
		if m.MountPoint == "/dev/shm" && m.FSType == "tmpfs" {
			continue
		}

		extMap[m.MountPoint] = m.MountPoint
	}

	// Masked paths map to /dev/null. Only non-directory paths that exist under rootFS.
	for _, p := range maskedPaths {
		hostPath := filepath.Join(rootFS, p)
		info, err := os.Lstat(hostPath)
		if err != nil || info.IsDir() {
			continue
		}
		extMap[p] = "/dev/null"
	}

	return extMap, skipped
}

// RemountProcSys remounts /proc/sys read-write or read-only.
func RemountProcSys(rw bool) error {
	flags := uintptr(syscall.MS_BIND | syscall.MS_REMOUNT)
	if !rw {
		flags |= syscall.MS_RDONLY
	}
	if err := syscall.Mount("proc", "/proc/sys", "", flags, ""); err != nil {
		mode := "rw"
		if !rw {
			mode = "ro"
		}
		return fmt.Errorf("failed to remount /proc/sys %s: %w", mode, err)
	}
	return nil
}