Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d381e6ff
Unverified
Commit
d381e6ff
authored
Feb 11, 2026
by
Schwinn Saereesitthipitak
Committed by
GitHub
Feb 12, 2026
Browse files
feat(chrek): config refactor, /dev/shm support, and mount-policy rewrite (#5946)
parent
b6824ae0
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1644 additions
and
1060 deletions
+1644
-1060
deploy/chrek/pkg/common/criu.go
deploy/chrek/pkg/common/criu.go
+3
-38
deploy/chrek/pkg/common/metadata.go
deploy/chrek/pkg/common/metadata.go
+0
-204
deploy/chrek/pkg/http_api_server/handlers.go
deploy/chrek/pkg/http_api_server/handlers.go
+162
-0
deploy/chrek/pkg/http_api_server/middleware.go
deploy/chrek/pkg/http_api_server/middleware.go
+18
-0
deploy/chrek/pkg/http_api_server/server.go
deploy/chrek/pkg/http_api_server/server.go
+75
-0
deploy/chrek/pkg/http_api_server/types.go
deploy/chrek/pkg/http_api_server/types.go
+43
-0
deploy/chrek/pkg/restore/config.go
deploy/chrek/pkg/restore/config.go
+224
-0
deploy/chrek/pkg/restore/criu.go
deploy/chrek/pkg/restore/criu.go
+80
-41
deploy/chrek/pkg/restore/filesystem.go
deploy/chrek/pkg/restore/filesystem.go
+6
-19
deploy/chrek/pkg/restore/link_remap.go
deploy/chrek/pkg/restore/link_remap.go
+364
-0
deploy/chrek/pkg/restore/mounts.go
deploy/chrek/pkg/restore/mounts.go
+24
-61
deploy/chrek/pkg/restore/options.go
deploy/chrek/pkg/restore/options.go
+0
-288
deploy/chrek/pkg/restore/process.go
deploy/chrek/pkg/restore/process.go
+111
-83
deploy/chrek/pkg/restore/restore.go
deploy/chrek/pkg/restore/restore.go
+308
-130
deploy/chrek/pkg/restore/shm.go
deploy/chrek/pkg/restore/shm.go
+113
-0
deploy/chrek/pkg/watcher/watcher.go
deploy/chrek/pkg/watcher/watcher.go
+43
-54
deploy/chrek/scripts/smart-entrypoint.sh
deploy/chrek/scripts/smart-entrypoint.sh
+0
-110
deploy/helm/charts/chrek/README.md
deploy/helm/charts/chrek/README.md
+0
-2
deploy/helm/charts/chrek/templates/configmap.yaml
deploy/helm/charts/chrek/templates/configmap.yaml
+61
-0
deploy/helm/charts/chrek/templates/daemonset.yaml
deploy/helm/charts/chrek/templates/daemonset.yaml
+9
-30
No files found.
deploy/chrek/pkg/common/criu.go
View file @
d381e6ff
...
...
@@ -10,10 +10,10 @@ import (
"golang.org/x/sys/unix"
)
// Open
Dir
ForCRIU opens a directory and clears the CLOEXEC flag
so the FD
// can be inherited by CRIU child processes.
// Open
Path
ForCRIU opens a
path (
directory
or file)
and clears the CLOEXEC flag
//
so the FD
can be inherited by CRIU child processes.
// Returns the opened file and its FD. Caller must close the file when done.
func
Open
Dir
ForCRIU
(
path
string
)
(
*
os
.
File
,
int32
,
error
)
{
func
Open
Path
ForCRIU
(
path
string
)
(
*
os
.
File
,
int32
,
error
)
{
dir
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
nil
,
0
,
fmt
.
Errorf
(
"failed to open %s: %w"
,
path
,
err
)
...
...
@@ -30,41 +30,6 @@ func OpenDirForCRIU(path string) (*os.File, int32, error) {
return
dir
,
int32
(
dir
.
Fd
()),
nil
}
// DefaultMaskedPaths returns the standard OCI masked paths.
// These paths are typically masked (made inaccessible) in containers.
// Used as fallback when checkpoint metadata doesn't include OCI-derived paths.
func
DefaultMaskedPaths
()
[]
string
{
return
[]
string
{
"/proc/bus"
,
"/proc/fs"
,
"/proc/irq"
,
"/proc/sys"
,
"/proc/sysrq-trigger"
,
"/proc/acpi"
,
"/proc/kcore"
,
"/proc/keys"
,
"/proc/latency_stats"
,
"/proc/timer_list"
,
"/proc/scsi"
,
"/proc/interrupts"
,
"/proc/asound"
,
"/sys/firmware"
,
"/sys/devices/virtual/powercap"
,
}
}
// DefaultReadonlyPaths returns the standard OCI readonly paths.
// These paths are typically mounted read-only in containers.
func
DefaultReadonlyPaths
()
[]
string
{
return
[]
string
{
"/proc/bus"
,
"/proc/fs"
,
"/proc/irq"
,
"/proc/sys"
,
"/proc/sysrq-trigger"
,
}
}
// CRIUMountPoint represents a parsed mount point from /proc/pid/mountinfo.
type
CRIUMountPoint
struct
{
MountID
string
// Mount ID
...
...
deploy/chrek/pkg/common/metadata.go
deleted
100644 → 0
View file @
b6824ae0
// metadata.go handles checkpoint metadata for cross-node restore operations.
package
common
import
(
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
)
const
(
// MetadataFilename is the name of the metadata file in checkpoint directories
MetadataFilename
=
"metadata.json"
// DescriptorsFilename is the name of the file descriptors file
DescriptorsFilename
=
"descriptors.json"
)
// CheckpointMetadata stores information needed for cross-node restore
type
CheckpointMetadata
struct
{
// Checkpoint identification
CheckpointID
string
`json:"checkpoint_id"`
CreatedAt
time
.
Time
`json:"created_at"`
// Source information
SourceNode
string
`json:"source_node"`
SourcePodIP
string
`json:"source_pod_ip,omitempty"`
// For cross-node TCP detection
ContainerID
string
`json:"container_id"`
PodName
string
`json:"pod_name"`
PodNamespace
string
`json:"pod_namespace"`
Image
string
`json:"image"`
// Process information
PID
int
`json:"pid"`
// Filesystem information
RootfsDiffPath
string
`json:"rootfs_diff_path,omitempty"`
// Path to rootfs-diff.tar
UpperDir
string
`json:"upper_dir,omitempty"`
// Original overlay upperdir
HasRootfsDiff
bool
`json:"has_rootfs_diff"`
// Whether rootfs diff was captured
HasDeletedFiles
bool
`json:"has_deleted_files"`
// Whether deleted files were tracked
// Mount mappings from original container
Mounts
[]
MountMetadata
`json:"mounts"`
// OCI spec derived paths (populated from containerd, used at restore)
// These replace hardcoded values with runtime-discovered configuration
MaskedPaths
[]
string
`json:"masked_paths,omitempty"`
// From OCI spec Linux.MaskedPaths
ReadonlyPaths
[]
string
`json:"readonly_paths,omitempty"`
// From OCI spec Linux.ReadonlyPaths
BindMountDests
[]
string
`json:"bind_mount_dests,omitempty"`
// Destinations of bind mounts (for tar exclusions)
// Namespace information
Namespaces
[]
NamespaceMetadata
`json:"namespaces"`
// CRIU options used during checkpoint (for restore compatibility)
CRIUOptions
CRIUOptionsMetadata
`json:"criu_options"`
}
// CRIUOptionsMetadata stores CRIU options used during checkpoint.
// This allows restore to use compatible options.
// Note: In our implementation, most options are hardcoded as always-on for K8s,
// but we store them for compatibility and debugging purposes.
type
CRIUOptionsMetadata
struct
{
TcpEstablished
bool
`json:"tcp_established"`
TcpClose
bool
`json:"tcp_close"`
ShellJob
bool
`json:"shell_job"`
FileLocks
bool
`json:"file_locks"`
LeaveRunning
bool
`json:"leave_running"`
LinkRemap
bool
`json:"link_remap"`
ExtMasters
bool
`json:"ext_masters"`
}
// MountMetadata stores information about a mount for remapping during restore
type
MountMetadata
struct
{
ContainerPath
string
`json:"container_path"`
// Path inside container (e.g., /usr/share/nginx/html)
HostPath
string
`json:"host_path"`
// Original host path from mountinfo
OCISource
string
`json:"oci_source,omitempty"`
// Source path from OCI spec (may differ from HostPath)
OCIType
string
`json:"oci_type,omitempty"`
// Mount type from OCI spec (bind, tmpfs, etc.)
OCIOptions
[]
string
`json:"oci_options,omitempty"`
// Mount options from OCI spec
VolumeType
string
`json:"volume_type"`
// emptyDir, pvc, configMap, secret, hostPath (best-effort)
VolumeName
string
`json:"volume_name"`
// Kubernetes volume name (best-effort from path parsing)
FSType
string
`json:"fs_type"`
// Filesystem type from mountinfo
ReadOnly
bool
`json:"read_only"`
// Whether mount is read-only
}
// NamespaceMetadata stores namespace information
type
NamespaceMetadata
struct
{
Type
string
`json:"type"`
// net, pid, mnt, etc.
Inode
uint64
`json:"inode"`
// Namespace inode
IsExternal
bool
`json:"is_external"`
// Whether namespace is external (shared)
}
// NewCheckpointMetadata creates a new metadata instance
func
NewCheckpointMetadata
(
checkpointID
string
)
*
CheckpointMetadata
{
return
&
CheckpointMetadata
{
CheckpointID
:
checkpointID
,
CreatedAt
:
time
.
Now
()
.
UTC
(),
Mounts
:
make
([]
MountMetadata
,
0
),
Namespaces
:
make
([]
NamespaceMetadata
,
0
),
}
}
// SaveMetadata writes metadata to a JSON file in the checkpoint directory
func
SaveMetadata
(
checkpointDir
string
,
meta
*
CheckpointMetadata
)
error
{
data
,
err
:=
json
.
MarshalIndent
(
meta
,
""
,
" "
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to marshal metadata: %w"
,
err
)
}
metadataPath
:=
filepath
.
Join
(
checkpointDir
,
MetadataFilename
)
if
err
:=
os
.
WriteFile
(
metadataPath
,
data
,
0644
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to write metadata file: %w"
,
err
)
}
return
nil
}
// LoadMetadata reads metadata from a checkpoint directory
func
LoadMetadata
(
checkpointDir
string
)
(
*
CheckpointMetadata
,
error
)
{
metadataPath
:=
filepath
.
Join
(
checkpointDir
,
MetadataFilename
)
data
,
err
:=
os
.
ReadFile
(
metadataPath
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read metadata file: %w"
,
err
)
}
var
meta
CheckpointMetadata
if
err
:=
json
.
Unmarshal
(
data
,
&
meta
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to unmarshal metadata: %w"
,
err
)
}
return
&
meta
,
nil
}
// SaveDescriptors writes file descriptor information to the checkpoint directory
func
SaveDescriptors
(
checkpointDir
string
,
descriptors
[]
string
)
error
{
data
,
err
:=
json
.
Marshal
(
descriptors
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to marshal descriptors: %w"
,
err
)
}
descriptorsPath
:=
filepath
.
Join
(
checkpointDir
,
DescriptorsFilename
)
if
err
:=
os
.
WriteFile
(
descriptorsPath
,
data
,
0600
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to write descriptors file: %w"
,
err
)
}
return
nil
}
// LoadDescriptors reads file descriptor information from checkpoint directory
func
LoadDescriptors
(
checkpointDir
string
)
([]
string
,
error
)
{
descriptorsPath
:=
filepath
.
Join
(
checkpointDir
,
DescriptorsFilename
)
data
,
err
:=
os
.
ReadFile
(
descriptorsPath
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read descriptors file: %w"
,
err
)
}
var
descriptors
[]
string
if
err
:=
json
.
Unmarshal
(
data
,
&
descriptors
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to unmarshal descriptors: %w"
,
err
)
}
return
descriptors
,
nil
}
// GetCheckpointDir returns the path to a checkpoint directory
func
GetCheckpointDir
(
baseDir
,
checkpointID
string
)
string
{
return
filepath
.
Join
(
baseDir
,
checkpointID
)
}
// ListCheckpoints returns all checkpoint IDs in the base directory
func
ListCheckpoints
(
baseDir
string
)
([]
string
,
error
)
{
entries
,
err
:=
os
.
ReadDir
(
baseDir
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read checkpoint directory: %w"
,
err
)
}
var
checkpoints
[]
string
for
_
,
entry
:=
range
entries
{
if
!
entry
.
IsDir
()
{
continue
}
// Check if metadata file exists
metadataPath
:=
filepath
.
Join
(
baseDir
,
entry
.
Name
(),
MetadataFilename
)
if
_
,
err
:=
os
.
Stat
(
metadataPath
);
err
==
nil
{
checkpoints
=
append
(
checkpoints
,
entry
.
Name
())
}
}
return
checkpoints
,
nil
}
// GetCheckpointInfo returns metadata for a specific checkpoint
func
GetCheckpointInfo
(
baseDir
,
checkpointID
string
)
(
*
CheckpointMetadata
,
error
)
{
checkpointDir
:=
GetCheckpointDir
(
baseDir
,
checkpointID
)
return
LoadMetadata
(
checkpointDir
)
}
// DeleteCheckpoint removes a checkpoint directory
func
DeleteCheckpoint
(
baseDir
,
checkpointID
string
)
error
{
checkpointDir
:=
GetCheckpointDir
(
baseDir
,
checkpointID
)
return
os
.
RemoveAll
(
checkpointDir
)
}
deploy/chrek/pkg/http_api_server/handlers.go
0 → 100644
View file @
d381e6ff
// handlers.go provides HTTP handlers for the checkpoint agent server.
package
httpApiServer
import
(
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"path/filepath"
"time"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
// Handlers holds dependencies for HTTP handlers.
type
Handlers
struct
{
cfg
ServerConfig
checkpointer
*
checkpoint
.
Checkpointer
}
// NewHandlers creates a new Handlers instance.
func
NewHandlers
(
cfg
ServerConfig
,
checkpointer
*
checkpoint
.
Checkpointer
)
*
Handlers
{
return
&
Handlers
{
cfg
:
cfg
,
checkpointer
:
checkpointer
,
}
}
// HandleHealth handles GET /health requests.
func
(
h
*
Handlers
)
HandleHealth
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
if
r
.
Method
!=
http
.
MethodGet
{
http
.
Error
(
w
,
"Method not allowed"
,
http
.
StatusMethodNotAllowed
)
return
}
resp
:=
HealthResponse
{
Status
:
"healthy"
,
NodeName
:
h
.
cfg
.
NodeName
,
}
writeJSON
(
w
,
http
.
StatusOK
,
resp
)
}
// HandleCheckpoint handles POST /checkpoint requests.
func
(
h
*
Handlers
)
HandleCheckpoint
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
if
r
.
Method
!=
http
.
MethodPost
{
http
.
Error
(
w
,
"Method not allowed"
,
http
.
StatusMethodNotAllowed
)
return
}
var
req
CheckpointRequest
if
err
:=
json
.
NewDecoder
(
r
.
Body
)
.
Decode
(
&
req
);
err
!=
nil
{
writeJSON
(
w
,
http
.
StatusBadRequest
,
CheckpointResponse
{
Success
:
false
,
Error
:
fmt
.
Sprintf
(
"Invalid request body: %v"
,
err
),
})
return
}
if
req
.
ContainerID
==
""
{
writeJSON
(
w
,
http
.
StatusBadRequest
,
CheckpointResponse
{
Success
:
false
,
Error
:
"container_id is required"
,
})
return
}
if
req
.
CheckpointID
==
""
{
req
.
CheckpointID
=
fmt
.
Sprintf
(
"ckpt-%d"
,
time
.
Now
()
.
UnixNano
())
}
// Build checkpoint params
params
:=
checkpoint
.
CheckpointRequest
{
ContainerID
:
req
.
ContainerID
,
ContainerName
:
req
.
ContainerName
,
CheckpointID
:
req
.
CheckpointID
,
CheckpointDir
:
h
.
cfg
.
CheckpointSpec
.
BasePath
,
NodeName
:
h
.
cfg
.
NodeName
,
PodName
:
req
.
PodName
,
PodNamespace
:
req
.
PodNamespace
,
}
// Copy checkpoint spec and disable CUDA if requested.
checkpointSpec
:=
*
h
.
cfg
.
CheckpointSpec
if
req
.
DisableCUDA
{
checkpointSpec
.
CRIU
.
LibDir
=
""
}
ctx
:=
r
.
Context
()
result
,
err
:=
h
.
checkpointer
.
Checkpoint
(
ctx
,
params
,
&
checkpointSpec
)
if
err
!=
nil
{
log
.
Printf
(
"Checkpoint failed: %v"
,
err
)
writeJSON
(
w
,
http
.
StatusInternalServerError
,
CheckpointResponse
{
Success
:
false
,
Error
:
err
.
Error
(),
})
return
}
// Write checkpoint.done marker so restore-entrypoint can detect this checkpoint
checkpointDonePath
:=
result
.
CheckpointDir
+
"/"
+
checkpoint
.
CheckpointDoneFilename
if
err
:=
os
.
WriteFile
(
checkpointDonePath
,
[]
byte
(
time
.
Now
()
.
Format
(
time
.
RFC3339
)),
0644
);
err
!=
nil
{
log
.
Printf
(
"Failed to write checkpoint.done marker: %v"
,
err
)
writeJSON
(
w
,
http
.
StatusInternalServerError
,
CheckpointResponse
{
Success
:
false
,
Error
:
fmt
.
Sprintf
(
"Checkpoint succeeded but failed to write done marker: %v"
,
err
),
})
return
}
log
.
Printf
(
"Wrote checkpoint.done marker: %s"
,
checkpointDonePath
)
log
.
Printf
(
"Checkpoint successful: %s"
,
result
.
CheckpointID
)
writeJSON
(
w
,
http
.
StatusOK
,
CheckpointResponse
{
Success
:
true
,
CheckpointID
:
result
.
CheckpointID
,
Message
:
fmt
.
Sprintf
(
"Checkpoint created successfully at %s"
,
result
.
CheckpointDir
),
})
}
// HandleListCheckpoints handles GET /checkpoints requests.
func
(
h
*
Handlers
)
HandleListCheckpoints
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
if
r
.
Method
!=
http
.
MethodGet
{
http
.
Error
(
w
,
"Method not allowed"
,
http
.
StatusMethodNotAllowed
)
return
}
checkpointIDs
,
err
:=
checkpoint
.
ListCheckpoints
(
h
.
cfg
.
CheckpointSpec
.
BasePath
)
if
err
!=
nil
{
writeJSON
(
w
,
http
.
StatusInternalServerError
,
map
[
string
]
string
{
"error"
:
err
.
Error
(),
})
return
}
var
checkpoints
[]
CheckpointInfo
for
_
,
id
:=
range
checkpointIDs
{
meta
,
err
:=
checkpoint
.
ReadCheckpointManifest
(
filepath
.
Join
(
h
.
cfg
.
CheckpointSpec
.
BasePath
,
id
))
if
err
!=
nil
{
continue
}
checkpoints
=
append
(
checkpoints
,
CheckpointInfo
{
ID
:
meta
.
CheckpointID
,
CreatedAt
:
meta
.
CreatedAt
,
SourceNode
:
meta
.
K8s
.
SourceNode
,
ContainerID
:
meta
.
K8s
.
ContainerID
,
PodName
:
meta
.
K8s
.
PodName
,
PodNamespace
:
meta
.
K8s
.
PodNamespace
,
})
}
writeJSON
(
w
,
http
.
StatusOK
,
ListCheckpointsResponse
{
Checkpoints
:
checkpoints
,
})
}
// writeJSON writes a JSON response.
func
writeJSON
(
w
http
.
ResponseWriter
,
status
int
,
data
interface
{})
{
w
.
Header
()
.
Set
(
"Content-Type"
,
"application/json"
)
w
.
WriteHeader
(
status
)
json
.
NewEncoder
(
w
)
.
Encode
(
data
)
}
deploy/chrek/pkg/http_api_server/middleware.go
0 → 100644
View file @
d381e6ff
// middleware.go provides HTTP middleware for the server.
package
httpApiServer
import
(
"log"
"net/http"
"time"
)
// LoggingMiddleware wraps an HTTP handler and logs request details.
func
LoggingMiddleware
(
next
http
.
Handler
)
http
.
Handler
{
return
http
.
HandlerFunc
(
func
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
start
:=
time
.
Now
()
log
.
Printf
(
"Started %s %s"
,
r
.
Method
,
r
.
URL
.
Path
)
next
.
ServeHTTP
(
w
,
r
)
log
.
Printf
(
"Completed %s %s in %v"
,
r
.
Method
,
r
.
URL
.
Path
,
time
.
Since
(
start
))
})
}
deploy/chrek/pkg/http_api_server/server.go
0 → 100644
View file @
d381e6ff
// server.go provides the HTTP server for the checkpoint agent.
package
httpApiServer
import
(
"context"
"log"
"net/http"
"time"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
// ServerConfig holds the configuration for the HTTP API server.
type
ServerConfig
struct
{
ListenAddr
string
NodeName
string
CheckpointSpec
*
checkpoint
.
CheckpointSpec
}
// Server is the HTTP API server for checkpoint operations.
type
Server
struct
{
cfg
ServerConfig
handlers
*
Handlers
httpServer
*
http
.
Server
}
// NewServer creates a new Server instance.
func
NewServer
(
cfg
ServerConfig
,
checkpointer
*
checkpoint
.
Checkpointer
)
*
Server
{
handlers
:=
NewHandlers
(
cfg
,
checkpointer
)
// Setup routes
mux
:=
http
.
NewServeMux
()
mux
.
HandleFunc
(
"/health"
,
handlers
.
HandleHealth
)
mux
.
HandleFunc
(
"/checkpoint"
,
handlers
.
HandleCheckpoint
)
mux
.
HandleFunc
(
"/checkpoints"
,
handlers
.
HandleListCheckpoints
)
// WriteTimeout must exceed the CRIU checkpoint timeout since /checkpoint
// blocks until the dump completes. Add 60s buffer for pre/post work.
writeTimeout
:=
time
.
Duration
(
cfg
.
CheckpointSpec
.
CRIU
.
Timeout
)
*
time
.
Second
+
60
*
time
.
Second
if
writeTimeout
<
300
*
time
.
Second
{
writeTimeout
=
300
*
time
.
Second
}
httpServer
:=
&
http
.
Server
{
Addr
:
cfg
.
ListenAddr
,
Handler
:
LoggingMiddleware
(
mux
),
ReadTimeout
:
30
*
time
.
Second
,
WriteTimeout
:
writeTimeout
,
IdleTimeout
:
120
*
time
.
Second
,
}
return
&
Server
{
cfg
:
cfg
,
handlers
:
handlers
,
httpServer
:
httpServer
,
}
}
// Start starts the HTTP server.
// This method blocks until the server is shut down.
func
(
s
*
Server
)
Start
()
error
{
log
.
Printf
(
"HTTP API server listening on %s"
,
s
.
cfg
.
ListenAddr
)
return
s
.
httpServer
.
ListenAndServe
()
}
// Shutdown gracefully shuts down the server.
func
(
s
*
Server
)
Shutdown
(
ctx
context
.
Context
)
error
{
log
.
Println
(
"Shutting down HTTP server..."
)
return
s
.
httpServer
.
Shutdown
(
ctx
)
}
// Addr returns the server's listen address.
func
(
s
*
Server
)
Addr
()
string
{
return
s
.
cfg
.
ListenAddr
}
deploy/chrek/pkg/http_api_server/types.go
0 → 100644
View file @
d381e6ff
// Package server provides HTTP server functionality for the checkpoint agent.
package
httpApiServer
import
"time"
// CheckpointRequest is the request body for checkpoint operations.
type
CheckpointRequest
struct
{
ContainerID
string
`json:"container_id"`
ContainerName
string
`json:"container_name,omitempty"`
// K8s container name (for volume type lookup)
CheckpointID
string
`json:"checkpoint_id"`
PodName
string
`json:"pod_name,omitempty"`
PodNamespace
string
`json:"pod_namespace,omitempty"`
DisableCUDA
bool
`json:"disable_cuda,omitempty"`
// Disable CUDA plugin for non-GPU workloads
}
// CheckpointResponse is the response for checkpoint operations.
type
CheckpointResponse
struct
{
Success
bool
`json:"success"`
CheckpointID
string
`json:"checkpoint_id,omitempty"`
Message
string
`json:"message,omitempty"`
Error
string
`json:"error,omitempty"`
}
// CheckpointInfo represents information about a checkpoint.
type
CheckpointInfo
struct
{
ID
string
`json:"id"`
CreatedAt
time
.
Time
`json:"created_at"`
SourceNode
string
`json:"source_node"`
ContainerID
string
`json:"container_id"`
PodName
string
`json:"pod_name"`
PodNamespace
string
`json:"pod_namespace"`
}
// ListCheckpointsResponse is the response for list checkpoints.
type
ListCheckpointsResponse
struct
{
Checkpoints
[]
CheckpointInfo
`json:"checkpoints"`
}
// HealthResponse is the response for health check.
type
HealthResponse
struct
{
Status
string
`json:"status"`
NodeName
string
`json:"node_name"`
}
deploy/chrek/pkg/restore/config.go
0 → 100644
View file @
d381e6ff
// config.go defines the RestoreRequest struct for CRIU restore operations.
// CRIU options come from the saved CheckpointManifest, not from this request.
//
// The restore-entrypoint runs in placeholder containers which do NOT mount the
// ConfigMap. Static defaults are hardcoded here; per-pod dynamic values come
// from environment variables injected by the operator.
package
restore
import
(
"context"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/sirupsen/logrus"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
const
(
// RestoreLogFilename is the CRIU restore log filename.
RestoreLogFilename
=
"restore.log"
// CRIULogDir is the directory where CRIU restore logs are copied for debugging.
CRIULogDir
=
"/checkpoints/restore-logs"
// RestoreTriggerPath is the default path to the trigger file for trigger-based restore.
RestoreTriggerPath
=
"/tmp/restore-trigger"
)
// RestoreRequest holds runtime request inputs for the restore entrypoint.
// CRIU options are NOT stored here - they come from the saved CheckpointManifest.
type
RestoreRequest
struct
{
// === Per-pod dynamic values (from operator-injected env vars) ===
// CheckpointPath is the base directory containing checkpoints.
CheckpointPath
string
// CheckpointHash is the ID/hash of the checkpoint to restore.
CheckpointHash
string
// CheckpointLocation is the full resolved path to the checkpoint directory.
CheckpointLocation
string
// SkipWaitForCheckpoint controls the entrypoint behavior.
SkipWaitForCheckpoint
bool
// ColdStartArgs is the command+args to exec if no checkpoint is available.
ColdStartArgs
[]
string
// Debug enables debug logging.
Debug
bool
// === Static defaults (hardcoded) ===
// RestoreMarkerFilePath is where restore-entrypoint writes a marker before CRIU restore.
RestoreMarkerFilePath
string
// RestoreTrigger is the path to the trigger file that signals restore should start.
RestoreTrigger
string
// WaitTimeout is the maximum time to wait for a checkpoint.
// Zero means wait indefinitely.
WaitTimeout
time
.
Duration
}
// ConfigError represents a configuration validation error.
type
ConfigError
struct
{
Field
string
Message
string
}
func
(
e
*
ConfigError
)
Error
()
string
{
return
fmt
.
Sprintf
(
"config error: %s: %s"
,
e
.
Field
,
e
.
Message
)
}
// NewRestoreRequest creates a RestoreRequest with hardcoded defaults and
// operator-injected environment variable values.
func
NewRestoreRequest
(
args
[]
string
)
(
*
RestoreRequest
,
error
)
{
cfg
:=
&
RestoreRequest
{
RestoreTrigger
:
RestoreTriggerPath
,
ColdStartArgs
:
args
,
}
if
v
:=
os
.
Getenv
(
"DYN_CHECKPOINT_PATH"
);
v
!=
""
{
cfg
.
CheckpointPath
=
v
}
if
v
:=
os
.
Getenv
(
"DYN_CHECKPOINT_HASH"
);
v
!=
""
{
cfg
.
CheckpointHash
=
v
}
if
v
:=
os
.
Getenv
(
"DYN_CHECKPOINT_LOCATION"
);
v
!=
""
{
cfg
.
CheckpointLocation
=
v
}
else
if
cfg
.
CheckpointPath
!=
""
&&
cfg
.
CheckpointHash
!=
""
{
cfg
.
CheckpointLocation
=
cfg
.
CheckpointPath
+
"/"
+
cfg
.
CheckpointHash
}
cfg
.
SkipWaitForCheckpoint
=
os
.
Getenv
(
"SKIP_WAIT_FOR_CHECKPOINT"
)
==
"1"
cfg
.
Debug
=
os
.
Getenv
(
"DEBUG"
)
==
"1"
cfg
.
RestoreMarkerFilePath
=
os
.
Getenv
(
"DYN_RESTORE_MARKER_FILE"
)
if
cfg
.
RestoreMarkerFilePath
==
""
{
return
nil
,
&
ConfigError
{
Field
:
"DYN_RESTORE_MARKER_FILE"
,
Message
:
"must be set"
,
}
}
return
cfg
,
nil
}
type
checkpointDoneMarker
struct
{
Success
bool
`json:"success"`
Error
string
`json:"error,omitempty"`
}
func
checkpointDoneSucceeded
(
donePath
string
,
log
*
logrus
.
Entry
)
bool
{
data
,
err
:=
os
.
ReadFile
(
donePath
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"path"
,
donePath
)
.
Warn
(
"Failed to read checkpoint.done marker"
)
return
false
}
var
marker
checkpointDoneMarker
if
err
:=
json
.
Unmarshal
(
data
,
&
marker
);
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"path"
,
donePath
)
.
Warn
(
"Failed to parse checkpoint.done marker"
)
return
false
}
if
!
marker
.
Success
{
fields
:=
logrus
.
Fields
{
"path"
:
donePath
}
if
marker
.
Error
!=
""
{
fields
[
"error"
]
=
marker
.
Error
}
log
.
WithFields
(
fields
)
.
Warn
(
"checkpoint.done marker reports failed checkpoint"
)
return
false
}
return
true
}
// ShouldRestore checks if a restore should be performed.
// Returns the checkpoint path and true if restore should proceed.
func
ShouldRestore
(
cfg
*
RestoreRequest
,
log
*
logrus
.
Entry
)
(
string
,
bool
)
{
// Method 1: Checkpoint location is set and checkpoint is fully complete
if
cfg
.
CheckpointLocation
!=
""
{
donePath
:=
cfg
.
CheckpointLocation
+
"/"
+
checkpoint
.
CheckpointDoneFilename
if
_
,
err
:=
os
.
Stat
(
donePath
);
err
==
nil
{
if
checkpointDoneSucceeded
(
donePath
,
log
)
{
log
.
WithField
(
"path"
,
cfg
.
CheckpointLocation
)
.
Info
(
"Checkpoint found (checkpoint.done success=true)"
)
return
cfg
.
CheckpointLocation
,
true
}
}
// Fallback: check for manifest.yaml but warn about potential race condition.
manifestPath
:=
cfg
.
CheckpointLocation
+
"/"
+
checkpoint
.
CheckpointManifestFilename
if
_
,
err
:=
os
.
Stat
(
manifestPath
);
err
==
nil
{
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
cfg
.
CheckpointLocation
,
"warning"
:
"checkpoint.done marker not found, checkpoint may be incomplete"
,
})
.
Warn
(
"Checkpoint manifest found but checkpoint.done missing - checkpoint may still be in progress"
)
}
}
// Method 2: Restore trigger file exists with checkpoint path
if
cfg
.
RestoreTrigger
!=
""
{
data
,
err
:=
os
.
ReadFile
(
cfg
.
RestoreTrigger
)
if
err
==
nil
{
checkpointPath
:=
strings
.
TrimSpace
(
string
(
data
))
if
checkpointPath
!=
""
{
donePath
:=
checkpointPath
+
"/"
+
checkpoint
.
CheckpointDoneFilename
if
_
,
err
:=
os
.
Stat
(
donePath
);
err
==
nil
{
if
checkpointDoneSucceeded
(
donePath
,
log
)
{
log
.
WithField
(
"path"
,
checkpointPath
)
.
Info
(
"Restore triggered via file (checkpoint.done success=true)"
)
return
checkpointPath
,
true
}
}
}
}
}
return
""
,
false
}
// WaitForCheckpoint waits for a checkpoint to become available.
// If cfg.WaitTimeout is zero, waits indefinitely (until ctx is cancelled).
func
WaitForCheckpoint
(
ctx
context
.
Context
,
cfg
*
RestoreRequest
,
log
*
logrus
.
Entry
)
(
string
,
error
)
{
if
cfg
.
WaitTimeout
>
0
{
log
.
WithField
(
"timeout"
,
cfg
.
WaitTimeout
)
.
Info
(
"Waiting for checkpoint"
)
}
else
{
log
.
Info
(
"Waiting for checkpoint indefinitely"
)
}
startTime
:=
time
.
Now
()
ticker
:=
time
.
NewTicker
(
time
.
Second
)
defer
ticker
.
Stop
()
lastLog
:=
time
.
Now
()
for
{
select
{
case
<-
ctx
.
Done
()
:
return
""
,
ctx
.
Err
()
case
<-
ticker
.
C
:
if
path
,
ok
:=
ShouldRestore
(
cfg
,
log
);
ok
{
return
path
,
nil
}
// Log progress every 30 seconds
if
time
.
Since
(
lastLog
)
>=
30
*
time
.
Second
{
elapsed
:=
time
.
Since
(
startTime
)
log
.
WithField
(
"elapsed"
,
elapsed
)
.
Info
(
"Still waiting for checkpoint..."
)
lastLog
=
time
.
Now
()
}
// Only enforce deadline if WaitTimeout is set (non-zero)
if
cfg
.
WaitTimeout
>
0
&&
time
.
Since
(
startTime
)
>=
cfg
.
WaitTimeout
{
return
""
,
fmt
.
Errorf
(
"timed out waiting for checkpoint after %s"
,
cfg
.
WaitTimeout
)
}
}
}
}
deploy/chrek/pkg/restore/criu.go
View file @
d381e6ff
...
...
@@ -12,28 +12,42 @@ import (
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/common"
)
// CRIURestoreConfig holds configuration for CRIU restore operations.
// Most options are always-on with safe defaults for K8s environments.
type
CRIURestoreConfig
struct
{
ImageDirFD
int32
RootPath
string
LogLevel
int32
LogFile
string
WorkDirFD
int32
NetNsFD
int32
// CRIURestorePlan holds configuration for CRIU restore operations.
// Most fields come from the saved CheckpointManifest.CRIUDump.CRIU settings.
type
CRIURestorePlan
struct
{
// File descriptors
ImageDirFD
int32
WorkDirFD
int32
NetNsFD
int32
// Paths
RootPath
string
LogFile
string
// Options from CheckpointManifest.CRIUDump.CRIU.
LogLevel
int32
Timeout
uint32
// CRIU timeout in seconds (0 = no timeout, required for CUDA)
ShellJob
bool
// Allow session leaders (containers are often session leaders)
TcpClose
bool
// Close TCP connections (pod IPs change on restore)
FileLocks
bool
// Allow file locks
ExtUnixSk
bool
// Allow external Unix sockets
LinkRemap
bool
// Handle deleted-but-open files via CRIU link remap
ManageCgroupsMode
string
// Cgroup handling mode: "ignore" lets K8s manage cgroups
// External mount mappings (from CheckpointManifest.CRIUDump.ExtMnt).
ExtMountMaps
[]
*
criurpc
.
ExtMountMap
}
// OpenImageDir opens a checkpoint directory and clears CLOEXEC for CRIU.
// Returns the opened file and its FD. Caller must close the file when done.
func
OpenImageDir
(
checkpointPath
string
)
(
*
os
.
File
,
int32
,
error
)
{
return
common
.
Open
Dir
ForCRIU
(
checkpointPath
)
return
common
.
Open
Path
ForCRIU
(
checkpointPath
)
}
// OpenNetworkNamespace opens the target network namespace for restore.
// Returns the opened file and its FD. Caller must close the file when done.
func
OpenNetworkNamespace
(
nsPath
string
)
(
*
os
.
File
,
int32
,
error
)
{
return
common
.
Open
Dir
ForCRIU
(
nsPath
)
return
common
.
Open
Path
ForCRIU
(
nsPath
)
}
// OpenWorkDir opens a work directory for CRIU and clears CLOEXEC.
...
...
@@ -65,62 +79,87 @@ func OpenWorkDir(workDir string, log *logrus.Entry) (*os.File, int32) {
return
workDirFile
,
int32
(
workDirFile
.
Fd
())
}
// BuildRestore
CRIU
Opts creates CRIU options for restore from a
config struct
.
// Build
CRIU
RestoreOpt
ion
s creates CRIU options for restore from a
runtime plan
.
//
// Always-on options for K8s:
// - ShellJob: containers are often session leaders
// - TcpClose: pod IPs change on restore/migration
// - FileLocks: applications use file locks
// - ExtUnixSk: containers have external Unix sockets
// - ManageCgroups (IGNORE): let K8s manage cgroups
func
BuildRestoreCRIUOpts
(
cfg
CRIURestoreConfig
)
*
criurpc
.
CriuOpts
{
cgMode
:=
criurpc
.
CriuCgMode_IGNORE
// Options from CheckpointManifest.CRIUDump.CRIU (saved at checkpoint time):
// - ShellJob, TcpClose, FileLocks, ExtUnixSk, LinkRemap, ManageCgroupsMode
//
// Hardcoded restore-specific options:
// - RstSibling: restore in detached mode
// - MntnsCompatMode: cross-container restore
// - EvasiveDevices, ForceIrmap: device/inode handling
func
BuildCRIURestoreOptions
(
plan
CRIURestorePlan
)
*
criurpc
.
CriuOpts
{
// Map cgroup management mode from plan.
var
cgMode
criurpc
.
CriuCgMode
switch
plan
.
ManageCgroupsMode
{
case
"soft"
:
cgMode
=
criurpc
.
CriuCgMode_SOFT
case
"full"
:
cgMode
=
criurpc
.
CriuCgMode_FULL
case
"strict"
:
cgMode
=
criurpc
.
CriuCgMode_STRICT
case
"ignore"
,
""
:
cgMode
=
criurpc
.
CriuCgMode_IGNORE
default
:
cgMode
=
criurpc
.
CriuCgMode_IGNORE
}
criuOpts
:=
&
criurpc
.
CriuOpts
{
ImagesDirFd
:
proto
.
Int32
(
cfg
.
ImageDirFD
),
LogLevel
:
proto
.
Int32
(
cfg
.
LogLevel
),
LogFile
:
proto
.
String
(
cfg
.
LogFile
),
ImagesDirFd
:
proto
.
Int32
(
plan
.
ImageDirFD
),
LogLevel
:
proto
.
Int32
(
plan
.
LogLevel
),
LogFile
:
proto
.
String
(
plan
.
LogFile
),
// Root filesystem - use current container's root
Root
:
proto
.
String
(
cfg
.
RootPath
),
Root
:
proto
.
String
(
plan
.
RootPath
),
// Restore in detached mode - process runs in background
// Restore in detached mode - process runs in background
(restore-specific)
RstSibling
:
proto
.
Bool
(
true
),
// Mount namespace compatibility mode for cross-container restore
MntnsCompatMode
:
proto
.
Bool
(
true
),
// Always-on for K8s environments
ShellJob
:
proto
.
Bool
(
true
),
TcpClose
:
proto
.
Bool
(
true
),
FileLocks
:
proto
.
Bool
(
true
),
ExtUnixSk
:
proto
.
Bool
(
true
),
// Cgroup management - ignore to avoid conflicts
// Mount namespace mode:
// - MntnsCompatMode=false (default): Uses mount-v2 with MOVE_MOUNT_SET_GROUP (kernel 5.15+)
// This is preferred as it doesn't create temp dirs in /tmp
// - MntnsCompatMode=true: Uses compat mode which creates /tmp/cr-tmpfs.XXX
// This can cause "Device or resource busy" errors on cleanup
// We explicitly set to false to use mount-v2 (requires kernel 5.15+)
MntnsCompatMode
:
proto
.
Bool
(
false
),
// Options from saved CheckpointManifest.CRIUDump.CRIU.
ShellJob
:
proto
.
Bool
(
plan
.
ShellJob
),
TcpClose
:
proto
.
Bool
(
plan
.
TcpClose
),
FileLocks
:
proto
.
Bool
(
plan
.
FileLocks
),
ExtUnixSk
:
proto
.
Bool
(
plan
.
ExtUnixSk
),
LinkRemap
:
proto
.
Bool
(
plan
.
LinkRemap
),
// Cgroup management from saved settings.
ManageCgroups
:
proto
.
Bool
(
true
),
ManageCgroupsMode
:
&
cgMode
,
// Device and inode handling
// Device and inode handling
(restore-specific)
EvasiveDevices
:
proto
.
Bool
(
true
),
ForceIrmap
:
proto
.
Bool
(
true
),
// External mount mappings
ExtMnt
:
cfg
.
ExtMountMaps
,
ExtMnt
:
plan
.
ExtMountMaps
,
}
// Add network namespace inheritance if provided
if
cfg
.
NetNsFD
>=
0
{
if
plan
.
NetNsFD
>=
0
{
criuOpts
.
InheritFd
=
[]
*
criurpc
.
InheritFd
{
{
Key
:
proto
.
String
(
"extNetNs"
),
Fd
:
proto
.
Int32
(
cfg
.
NetNsFD
),
Fd
:
proto
.
Int32
(
plan
.
NetNsFD
),
},
}
}
// Add work directory if specified
if
cfg
.
WorkDirFD
>=
0
{
criuOpts
.
WorkDirFd
=
proto
.
Int32
(
cfg
.
WorkDirFD
)
if
plan
.
WorkDirFD
>=
0
{
criuOpts
.
WorkDirFd
=
proto
.
Int32
(
plan
.
WorkDirFD
)
}
// Add timeout if specified (required for CUDA restores)
if
plan
.
Timeout
>
0
{
criuOpts
.
Timeout
=
proto
.
Uint32
(
plan
.
Timeout
)
}
return
criuOpts
...
...
deploy/chrek/pkg/restore/filesystem.go
View file @
d381e6ff
...
...
@@ -8,19 +8,14 @@ import (
"path/filepath"
"github.com/sirupsen/logrus"
)
const
(
// RootfsDiffFilename is the name of the rootfs diff tar file
RootfsDiffFilename
=
"rootfs-diff.tar"
// DeletedFilesFilename is the name of the deleted files JSON
DeletedFilesFilename
=
"deleted-files.json"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
// ApplyRootfsDiff extracts the rootfs-diff.tar from the checkpoint to the target root.
// This restores filesystem changes that were made in the original container.
func
ApplyRootfsDiff
(
checkpointPath
,
targetRoot
string
,
log
*
logrus
.
Entry
)
error
{
rootfsDiffPath
:=
filepath
.
Join
(
checkpointPath
,
RootfsDiffFilename
)
rootfsDiffPath
:=
filepath
.
Join
(
checkpointPath
,
checkpoint
.
RootfsDiffFilename
)
// Check if rootfs-diff.tar exists
if
_
,
err
:=
os
.
Stat
(
rootfsDiffPath
);
os
.
IsNotExist
(
err
)
{
...
...
@@ -30,15 +25,10 @@ func ApplyRootfsDiff(checkpointPath, targetRoot string, log *logrus.Entry) error
log
.
WithField
(
"path"
,
rootfsDiffPath
)
.
Info
(
"Applying rootfs diff"
)
// Build tar command with options to handle conflicts:
// --keep-old-files: Don't overwrite existing files (may already be mounted)
// Exclude paths that are typically mounted read-only by the container runtime
// Exclusions are already applied at checkpoint time (bind mounts, system dirs, etc.)
// so we just extract with --keep-old-files to avoid overwriting existing files.
cmd
:=
exec
.
Command
(
"tar"
,
"--keep-old-files"
,
"--exclude=./run/secrets"
,
"--exclude=./etc/resolv.conf"
,
"--exclude=./etc/hostname"
,
"--exclude=./etc/hosts"
,
"-C"
,
targetRoot
,
"-xf"
,
rootfsDiffPath
,
)
...
...
@@ -61,7 +51,7 @@ func ApplyRootfsDiff(checkpointPath, targetRoot string, log *logrus.Entry) error
// ApplyDeletedFiles removes files that were deleted in the original container.
// These are tracked via overlay whiteout markers (.wh.<filename>).
func
ApplyDeletedFiles
(
checkpointPath
,
targetRoot
string
,
log
*
logrus
.
Entry
)
error
{
deletedFilesPath
:=
filepath
.
Join
(
checkpointPath
,
DeletedFilesFilename
)
deletedFilesPath
:=
filepath
.
Join
(
checkpointPath
,
checkpoint
.
DeletedFilesFilename
)
// Check if deleted-files.json exists
data
,
err
:=
os
.
ReadFile
(
deletedFilesPath
)
...
...
@@ -109,8 +99,5 @@ func ApplyDeletedFiles(checkpointPath, targetRoot string, log *logrus.Entry) err
func
CheckpointFilesExist
(
checkpointPath
string
)
bool
{
// Check for CRIU image files (core-*.img is always present)
matches
,
err
:=
filepath
.
Glob
(
filepath
.
Join
(
checkpointPath
,
"core-*.img"
))
if
err
!=
nil
||
len
(
matches
)
==
0
{
return
false
}
return
true
return
err
==
nil
&&
len
(
matches
)
>
0
}
deploy/chrek/pkg/restore/link_remap.go
0 → 100644
View file @
d381e6ff
// Package restore provides CRIU restore operations.
package
restore
import
(
"encoding/binary"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/checkpoint-restore/go-criu/v7/crit"
"github.com/checkpoint-restore/go-criu/v7/crit/images/fdinfo"
"github.com/checkpoint-restore/go-criu/v7/crit/images/regfile"
remap_file_path
"github.com/checkpoint-restore/go-criu/v7/crit/images/remap-file-path"
"github.com/sirupsen/logrus"
"google.golang.org/protobuf/proto"
)
// CreateLinkRemapStubs parses CRIU images to find remapped files and creates
// the link_remap stub files needed for CRIU restore.
//
// Background: When a file is unlink()'d but a process still has an open FD to it,
// CRIU handles this via "link remapping":
//
// - During dump: CRIU creates a hardlink link_remap.<id> -> original_file
// - During restore: CRIU does linkat(link_remap.<id>, original_path) to recreate it
//
// The link_remap file only exists on the original node's filesystem. For cross-node
// restore, we must create stub files so CRIU can hardlink from them.
//
// Without these stubs, CRIU fails with:
//
// "Can't link <path>/link_remap.X -> <path>/original: No such file or directory"
func
CreateLinkRemapStubs
(
checkpointPath
string
,
log
*
logrus
.
Entry
)
error
{
// 1. Parse remap-fpath.img to find files that need remapping
remapPath
:=
filepath
.
Join
(
checkpointPath
,
"remap-fpath.img"
)
remaps
,
err
:=
parseRemapFpath
(
remapPath
)
if
err
!=
nil
{
if
os
.
IsNotExist
(
err
)
{
log
.
Debug
(
"No remap-fpath.img found, no link_remap stubs needed"
)
return
nil
}
return
fmt
.
Errorf
(
"failed to parse remap-fpath.img: %w"
,
err
)
}
if
len
(
remaps
)
==
0
{
log
.
Debug
(
"No file remaps found in checkpoint"
)
return
nil
}
// 2. Parse file info to build ID -> fileInfo mapping
// Try reg-files.img first (older CRIU format), fall back to files.img (newer format)
regFilesPath
:=
filepath
.
Join
(
checkpointPath
,
"reg-files.img"
)
filesPath
:=
filepath
.
Join
(
checkpointPath
,
"files.img"
)
var
fileMap
map
[
uint32
]
fileInfo
var
parseErr
error
// Try reg-files.img first (older CRIU format)
fileMap
,
parseErr
=
parseRegFilesWithMode
(
regFilesPath
)
if
parseErr
!=
nil
{
log
.
WithError
(
parseErr
)
.
Debug
(
"Could not parse reg-files.img, trying files.img"
)
// Fall back to files.img (newer format)
fileMap
,
parseErr
=
parseFilesImgWithMode
(
filesPath
)
if
parseErr
!=
nil
{
log
.
WithError
(
parseErr
)
.
WithField
(
"remap_count"
,
len
(
remaps
))
.
Warn
(
"Found remap entries but could not parse reg-files.img or files.img — link_remap stubs will not be created"
)
return
fmt
.
Errorf
(
"found %d remap entries but could not build file map: %w"
,
len
(
remaps
),
parseErr
)
}
}
// 3. Create link_remap stub files for all remapped files
var
created
[]
string
for
_
,
remap
:=
range
remaps
{
// Look up the original file by ID
origInfo
,
ok
:=
fileMap
[
remap
.
origID
]
if
!
ok
{
log
.
WithField
(
"orig_id"
,
remap
.
origID
)
.
Debug
(
"Original file ID not found in file map, skipping"
)
continue
}
// Look up the remap file path by remap ID
// This is the link_remap.XXX file that CRIU will hardlink FROM
remapInfo
,
ok
:=
fileMap
[
remap
.
remapID
]
var
remapName
string
var
mode
os
.
FileMode
if
ok
{
remapName
=
remapInfo
.
name
mode
=
remapInfo
.
mode
}
else
{
// If we can't find the remap file in fileMap, construct it
// CRIU creates link_remap files in the same directory as the original
// with format: link_remap.<remap_id>
dir
:=
filepath
.
Dir
(
origInfo
.
name
)
if
!
strings
.
HasPrefix
(
dir
,
"/"
)
{
dir
=
"/"
+
dir
}
remapName
=
filepath
.
Join
(
dir
,
fmt
.
Sprintf
(
"link_remap.%d"
,
remap
.
remapID
))
// Use original file's mode since we don't have the remap file's mode
mode
=
origInfo
.
mode
log
.
WithFields
(
logrus
.
Fields
{
"orig_id"
:
remap
.
origID
,
"remap_id"
:
remap
.
remapID
,
"orig_path"
:
origInfo
.
name
,
"remap_path"
:
remapName
,
"mode"
:
fmt
.
Sprintf
(
"%04o"
,
mode
),
})
.
Debug
(
"Constructed link_remap path from remap ID"
)
}
// Normalize path
if
!
strings
.
HasPrefix
(
remapName
,
"/"
)
{
remapName
=
"/"
+
remapName
}
// Check if the link_remap file already exists
if
_
,
err
:=
os
.
Stat
(
remapName
);
err
==
nil
{
log
.
WithField
(
"remap_file"
,
remapName
)
.
Debug
(
"Link remap file already exists"
)
continue
}
// Create the link_remap stub file with correct permissions
// CRIU will hardlink FROM this file TO the original path
if
err
:=
createLinkRemapStub
(
remapName
,
mode
);
err
!=
nil
{
log
.
WithError
(
err
)
.
WithFields
(
logrus
.
Fields
{
"remap_file"
:
remapName
,
"target"
:
origInfo
.
name
,
"mode"
:
fmt
.
Sprintf
(
"%04o"
,
mode
),
})
.
Warn
(
"Failed to create link_remap stub"
)
continue
}
created
=
append
(
created
,
filepath
.
Base
(
remapName
))
log
.
WithFields
(
logrus
.
Fields
{
"remap_file"
:
remapName
,
"target"
:
origInfo
.
name
,
"mode"
:
fmt
.
Sprintf
(
"%04o"
,
mode
),
})
.
Debug
(
"Created link_remap stub file"
)
}
if
len
(
created
)
>
0
{
log
.
WithFields
(
logrus
.
Fields
{
"count"
:
len
(
created
),
"remap_files"
:
created
,
})
.
Info
(
"Created link_remap stub files for CRIU restore"
)
}
else
{
log
.
Debug
(
"No link_remap stubs needed"
)
}
return
nil
}
// fileInfo holds file metadata from CRIU checkpoint images
type
fileInfo
struct
{
name
string
mode
os
.
FileMode
}
// remapEntry represents a file remap entry from CRIU
type
remapEntry
struct
{
origID
uint32
remapID
uint32
remapType
int32
}
// parseRemapFpath parses the remap-fpath.img file
func
parseRemapFpath
(
path
string
)
([]
remapEntry
,
error
)
{
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
nil
,
err
}
defer
f
.
Close
()
// Read and validate magic number using go-criu's ReadMagic
magic
,
err
:=
crit
.
ReadMagic
(
f
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read magic: %w"
,
err
)
}
if
magic
!=
"REMAP_FPATH"
{
return
nil
,
fmt
.
Errorf
(
"unexpected magic: %s (expected REMAP_FPATH)"
,
magic
)
}
var
entries
[]
remapEntry
sizeBuf
:=
make
([]
byte
,
4
)
for
{
// Read entry size
_
,
err
:=
io
.
ReadFull
(
f
,
sizeBuf
)
if
err
==
io
.
EOF
||
err
==
io
.
ErrUnexpectedEOF
{
break
}
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry size: %w"
,
err
)
}
entrySize
:=
binary
.
LittleEndian
.
Uint32
(
sizeBuf
)
entryBuf
:=
make
([]
byte
,
entrySize
)
if
_
,
err
:=
io
.
ReadFull
(
f
,
entryBuf
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry data: %w"
,
err
)
}
// Parse protobuf
entry
:=
&
remap_file_path
.
RemapFilePathEntry
{}
if
err
:=
proto
.
Unmarshal
(
entryBuf
,
entry
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to unmarshal entry: %w"
,
err
)
}
entries
=
append
(
entries
,
remapEntry
{
origID
:
entry
.
GetOrigId
(),
remapID
:
entry
.
GetRemapId
(),
remapType
:
int32
(
entry
.
GetRemapType
()),
})
}
return
entries
,
nil
}
// parseRegFilesWithMode parses the reg-files.img file and returns a map of ID -> fileInfo
func
parseRegFilesWithMode
(
path
string
)
(
map
[
uint32
]
fileInfo
,
error
)
{
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
nil
,
err
}
defer
f
.
Close
()
// Read and validate magic number using go-criu's ReadMagic
magic
,
err
:=
crit
.
ReadMagic
(
f
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read magic: %w"
,
err
)
}
if
magic
!=
"REG_FILES"
{
return
nil
,
fmt
.
Errorf
(
"unexpected magic: %s (expected REG_FILES)"
,
magic
)
}
fileMap
:=
make
(
map
[
uint32
]
fileInfo
)
sizeBuf
:=
make
([]
byte
,
4
)
for
{
// Read entry size
_
,
err
:=
io
.
ReadFull
(
f
,
sizeBuf
)
if
err
==
io
.
EOF
||
err
==
io
.
ErrUnexpectedEOF
{
break
}
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry size: %w"
,
err
)
}
entrySize
:=
binary
.
LittleEndian
.
Uint32
(
sizeBuf
)
entryBuf
:=
make
([]
byte
,
entrySize
)
if
_
,
err
:=
io
.
ReadFull
(
f
,
entryBuf
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry data: %w"
,
err
)
}
// Parse protobuf
entry
:=
&
regfile
.
RegFileEntry
{}
if
err
:=
proto
.
Unmarshal
(
entryBuf
,
entry
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to unmarshal entry: %w"
,
err
)
}
// Convert CRIU mode (includes file type bits) to os.FileMode
// CRIU stores the full st_mode, we need just the permission bits
mode
:=
os
.
FileMode
(
entry
.
GetMode
()
&
0777
)
if
mode
==
0
{
mode
=
0600
// Default to owner read/write if mode not set
}
fileMap
[
entry
.
GetId
()]
=
fileInfo
{
name
:
entry
.
GetName
(),
mode
:
mode
,
}
}
return
fileMap
,
nil
}
// parseFilesImgWithMode parses the files.img file and returns a map of ID -> fileInfo
// This is the newer CRIU format where file info is embedded in FileEntry messages
func
parseFilesImgWithMode
(
path
string
)
(
map
[
uint32
]
fileInfo
,
error
)
{
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
nil
,
err
}
defer
f
.
Close
()
// Read and validate magic number using go-criu's ReadMagic
magic
,
err
:=
crit
.
ReadMagic
(
f
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read magic: %w"
,
err
)
}
if
magic
!=
"FILES"
{
return
nil
,
fmt
.
Errorf
(
"unexpected magic: %s (expected FILES)"
,
magic
)
}
fileMap
:=
make
(
map
[
uint32
]
fileInfo
)
sizeBuf
:=
make
([]
byte
,
4
)
for
{
// Read entry size
_
,
err
:=
io
.
ReadFull
(
f
,
sizeBuf
)
if
err
==
io
.
EOF
||
err
==
io
.
ErrUnexpectedEOF
{
break
}
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry size: %w"
,
err
)
}
entrySize
:=
binary
.
LittleEndian
.
Uint32
(
sizeBuf
)
entryBuf
:=
make
([]
byte
,
entrySize
)
if
_
,
err
:=
io
.
ReadFull
(
f
,
entryBuf
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to read entry data: %w"
,
err
)
}
// Parse protobuf as FileEntry
entry
:=
&
fdinfo
.
FileEntry
{}
if
err
:=
proto
.
Unmarshal
(
entryBuf
,
entry
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to unmarshal entry: %w"
,
err
)
}
// Extract fileinfo from embedded RegFileEntry if present
if
entry
.
GetReg
()
!=
nil
{
reg
:=
entry
.
GetReg
()
// Convert CRIU mode to os.FileMode (permission bits only)
mode
:=
os
.
FileMode
(
reg
.
GetMode
()
&
0777
)
if
mode
==
0
{
mode
=
0600
// Default to owner read/write if mode not set
}
fileMap
[
entry
.
GetId
()]
=
fileInfo
{
name
:
reg
.
GetName
(),
mode
:
mode
,
}
}
}
return
fileMap
,
nil
}
// createLinkRemapStub creates an empty stub file for CRIU link_remap.
// The file is created with the specified mode to match what CRIU expects.
func
createLinkRemapStub
(
path
string
,
mode
os
.
FileMode
)
error
{
// Ensure parent directory exists
dir
:=
filepath
.
Dir
(
path
)
if
err
:=
os
.
MkdirAll
(
dir
,
0755
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to create directory %s: %w"
,
dir
,
err
)
}
// Create file with the specified mode
// CRIU validates the file mode matches what was recorded at checkpoint time
f
,
err
:=
os
.
OpenFile
(
path
,
os
.
O_CREATE
|
os
.
O_WRONLY
|
os
.
O_TRUNC
,
mode
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to create file: %w"
,
err
)
}
defer
f
.
Close
()
// Write 32 bytes of zeros as stub content
// This provides a minimal valid file for CRIU to hardlink from
stub
:=
make
([]
byte
,
32
)
if
_
,
err
:=
f
.
Write
(
stub
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to write stub data: %w"
,
err
)
}
return
nil
}
deploy/chrek/pkg/restore/mounts.go
View file @
d381e6ff
...
...
@@ -6,81 +6,44 @@ import (
criurpc
"github.com/checkpoint-restore/go-criu/v7/rpc"
"google.golang.org/protobuf/proto"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/c
ommon
"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/c
heckpoint
"
)
// GenerateExtMountMaps generates external mount mappings for CRIU restore.
// It parses /proc/1/mountinfo (the restore container's mounts) and adds
// mappings for all mount points plus masked/readonly paths from common.
//
// If meta is nil or doesn't have OCI-derived paths, falls back to defaults.
func
GenerateExtMountMaps
(
meta
*
common
.
CheckpointMetadata
)
([]
*
criurpc
.
ExtMountMap
,
error
)
{
var
maps
[]
*
criurpc
.
ExtMountMap
addedMounts
:=
make
(
map
[
string
]
bool
)
// It reuses the exact dump-time ext-mount plan persisted in checkpoint manifest.
func
GenerateExtMountMaps
(
data
*
checkpoint
.
CheckpointManifest
)
([]
*
criurpc
.
ExtMountMap
,
error
)
{
if
data
==
nil
{
return
nil
,
fmt
.
Errorf
(
"checkpoint manifest is required"
)
}
if
len
(
data
.
CRIUDump
.
ExtMnt
)
==
0
{
return
nil
,
fmt
.
Errorf
(
"checkpoint manifest is missing criuDump.extMnt"
)
}
// Add root filesystem mapping first
maps
=
append
(
maps
,
&
criurpc
.
ExtMountMap
{
maps
:=
[]
*
criurpc
.
ExtMountMap
{{
Key
:
proto
.
String
(
"/"
),
Val
:
proto
.
String
(
"."
),
}
)
addedMounts
[
"/"
]
=
true
}
}
addedMounts
:=
map
[
string
]
struct
{}{
"/"
:
{}}
// Parse /proc/1/mountinfo for all current mount points
mountPoints
,
err
:=
common
.
GetMountPointPaths
(
"/proc/1/mountinfo"
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to parse mountinfo: %w"
,
err
)
}
for
_
,
mountPoint
:=
range
mountPoints
{
if
addedMounts
[
mountPoint
]
||
mountPoint
==
"/"
{
// Replay dump-time ext-mount plan exactly, with restore-specific root remap.
for
_
,
mount
:=
range
data
.
CRIUDump
.
ExtMnt
{
key
:=
mount
.
Key
if
key
==
""
||
key
==
"/"
{
continue
}
maps
=
append
(
maps
,
&
criurpc
.
ExtMountMap
{
Key
:
proto
.
String
(
mountPoint
),
Val
:
proto
.
String
(
mountPoint
),
})
addedMounts
[
mountPoint
]
=
true
}
// Use masked paths from checkpoint metadata (OCI spec derived)
// Fall back to defaults for backwards compatibility
maskedPaths
:=
common
.
DefaultMaskedPaths
()
if
meta
!=
nil
&&
len
(
meta
.
MaskedPaths
)
>
0
{
maskedPaths
=
meta
.
MaskedPaths
}
for
_
,
path
:=
range
maskedPaths
{
if
addedMounts
[
path
]
{
if
_
,
exists
:=
addedMounts
[
key
];
exists
{
continue
}
val
:=
mount
.
Val
if
val
==
""
{
val
=
key
}
maps
=
append
(
maps
,
&
criurpc
.
ExtMountMap
{
Key
:
proto
.
String
(
path
),
Val
:
proto
.
String
(
path
),
Key
:
proto
.
String
(
key
),
Val
:
proto
.
String
(
val
),
})
addedMounts
[
path
]
=
true
}
// Also add readonly paths from metadata if available
if
meta
!=
nil
{
for
_
,
path
:=
range
meta
.
ReadonlyPaths
{
if
addedMounts
[
path
]
{
continue
}
maps
=
append
(
maps
,
&
criurpc
.
ExtMountMap
{
Key
:
proto
.
String
(
path
),
Val
:
proto
.
String
(
path
),
})
addedMounts
[
path
]
=
true
}
addedMounts
[
key
]
=
struct
{}{}
}
return
maps
,
nil
}
// AddExtMountMap is a helper to create a single ExtMountMap entry.
func
AddExtMountMap
(
key
,
val
string
)
*
criurpc
.
ExtMountMap
{
return
&
criurpc
.
ExtMountMap
{
Key
:
proto
.
String
(
key
),
Val
:
proto
.
String
(
val
),
}
}
deploy/chrek/pkg/restore/options.go
deleted
100644 → 0
View file @
b6824ae0
// Package restore provides CRIU restore operations for self-restoring placeholder containers.
package
restore
import
(
"context"
"os"
"strconv"
"time"
criurpc
"github.com/checkpoint-restore/go-criu/v7/rpc"
"github.com/sirupsen/logrus"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/common"
)
// Config holds the configuration for the restore entrypoint.
// These values are typically set via environment variables.
type
Config
struct
{
// CheckpointPath is the base directory containing checkpoints (default: /checkpoints)
// Env: DYN_CHECKPOINT_PATH
CheckpointPath
string
// CheckpointHash is the ID/hash of the checkpoint to restore
// Env: DYN_CHECKPOINT_HASH
CheckpointHash
string
// RestoreTrigger is the path to the trigger file that signals restore should start
RestoreTrigger
string
// WaitForCheckpoint indicates whether to wait for a checkpoint to appear
WaitForCheckpoint
bool
// WaitTimeout is the maximum time to wait for a checkpoint to become available
WaitTimeout
time
.
Duration
// CRIULogLevel is the CRIU verbosity level (0-4, default: 4)
CRIULogLevel
int32
// DefaultCmd is the command to run if no checkpoint is available
DefaultCmd
string
// Debug enables debug logging
Debug
bool
// EmbeddedCheckpointPath is the path to an embedded checkpoint within the image
// When set, the checkpoint data is baked into the container image itself
EmbeddedCheckpointPath
string
// SkipInFlightConnections skips in-flight TCP connections during restore
SkipInFlightConnections
bool
// AutoDedup enables auto-deduplication of memory pages
AutoDedup
bool
// LazyPages enables lazy page migration (experimental)
LazyPages
bool
// CRIUWorkDir is an alternative work directory for CRIU (instead of /tmp)
// Useful when /tmp has mount issues
CRIUWorkDir
string
// CUDAPluginDir is the path to CRIU CUDA plugin directory (e.g., /usr/local/lib/criu)
// When set, a CRIU config file is created with libdir for CUDA plugin discovery during restore.
CUDAPluginDir
string
// CRIUTimeout is the CRIU timeout in seconds (required for CUDA restores)
CRIUTimeout
uint32
// RestoreMarkerFile is the path to a marker file created before CRIU restore.
// The restored process can check for this file to detect it was restored.
RestoreMarkerFile
string
}
// DefaultEmbeddedCheckpointPath is the default path for embedded checkpoints
const
DefaultEmbeddedCheckpointPath
=
"/embedded-checkpoint"
// ConfigFromEnv creates a Config from environment variables.
func
ConfigFromEnv
()
*
Config
{
cfg
:=
&
Config
{
CheckpointPath
:
getEnvOrDefault
(
"DYN_CHECKPOINT_PATH"
,
"/checkpoints"
),
CheckpointHash
:
os
.
Getenv
(
"DYN_CHECKPOINT_HASH"
),
RestoreTrigger
:
getEnvOrDefault
(
"RESTORE_TRIGGER"
,
"/tmp/restore-trigger"
),
WaitForCheckpoint
:
os
.
Getenv
(
"WAIT_FOR_CHECKPOINT"
)
==
"1"
,
WaitTimeout
:
parseDurationOrDefault
(
"RESTORE_WAIT_TIMEOUT"
,
300
*
time
.
Second
),
CRIULogLevel
:
parseIntOrDefault
(
"CRIU_LOG_LEVEL"
,
4
),
DefaultCmd
:
os
.
Getenv
(
"DEFAULT_CMD"
),
Debug
:
os
.
Getenv
(
"DEBUG"
)
==
"1"
,
EmbeddedCheckpointPath
:
getEnvOrDefault
(
"EMBEDDED_CHECKPOINT_PATH"
,
DefaultEmbeddedCheckpointPath
),
SkipInFlightConnections
:
os
.
Getenv
(
"CRIU_SKIP_IN_FLIGHT"
)
==
"1"
,
AutoDedup
:
os
.
Getenv
(
"CRIU_AUTO_DEDUP"
)
==
"1"
,
LazyPages
:
os
.
Getenv
(
"CRIU_LAZY_PAGES"
)
==
"1"
,
CRIUWorkDir
:
getEnvOrDefault
(
"CRIU_WORK_DIR"
,
""
),
CUDAPluginDir
:
os
.
Getenv
(
"CUDA_PLUGIN_DIR"
),
// For CUDA plugin discovery during restore
CRIUTimeout
:
uint32
(
parseIntOrDefault
(
"CRIU_TIMEOUT"
,
0
)),
RestoreMarkerFile
:
getEnvOrDefault
(
"DYN_RESTORE_MARKER_FILE"
,
"/tmp/dynamo-restored"
),
}
return
cfg
}
// RestoreOptions holds the options for a CRIU restore operation.
// Most CRIU options are hardcoded with safe K8s defaults.
type
RestoreOptions
struct
{
// CheckpointPath is the path to the checkpoint directory
CheckpointPath
string
// RootPath is the root filesystem path for restore (typically "/")
RootPath
string
// PidFile is the path where CRIU writes the restored process PID
PidFile
string
// LogFile is the name of the CRIU restore log file
LogFile
string
// LogLevel is the CRIU logging verbosity (0-4)
LogLevel
int32
// ExtMountMaps contains external mount mappings for CRIU
ExtMountMaps
[]
*
criurpc
.
ExtMountMap
// WorkDir is an alternative work directory for CRIU (instead of /tmp)
WorkDir
string
// LibDir is the path to CRIU plugin directory (e.g., /usr/local/lib/criu)
// When set, a CRIU config file is created with libdir for CUDA plugin discovery.
LibDir
string
// Timeout is the CRIU timeout in seconds (required for CUDA restores)
Timeout
uint32
}
// DefaultRestoreOptions returns RestoreOptions with sensible defaults.
func
DefaultRestoreOptions
(
checkpointPath
string
)
*
RestoreOptions
{
return
&
RestoreOptions
{
CheckpointPath
:
checkpointPath
,
RootPath
:
"/"
,
PidFile
:
"/tmp/restored.pid"
,
LogFile
:
"restore.log"
,
LogLevel
:
4
,
}
}
// LoadRestoreOptions creates RestoreOptions from checkpoint metadata.
// CRIU options are hardcoded with safe K8s defaults; metadata is only used for mount mappings.
func
LoadRestoreOptions
(
checkpointPath
string
,
logLevel
int32
)
(
*
RestoreOptions
,
error
)
{
opts
:=
DefaultRestoreOptions
(
checkpointPath
)
opts
.
LogLevel
=
logLevel
// Load metadata for OCI-derived paths (masked/readonly paths for external mounts)
meta
,
err
:=
common
.
LoadMetadata
(
checkpointPath
)
if
err
!=
nil
{
// Return defaults if metadata is unavailable
// GenerateExtMountMaps with nil will use fallback defaults
return
opts
,
nil
}
// Pre-generate external mount maps using OCI-derived paths from metadata
// This uses masked/readonly paths from the OCI spec instead of hardcoded defaults
extMounts
,
err
:=
GenerateExtMountMaps
(
meta
)
if
err
!=
nil
{
// Fall back to defaults if generation fails
return
opts
,
nil
}
opts
.
ExtMountMaps
=
extMounts
return
opts
,
nil
}
// ShouldRestore checks if a restore should be performed.
// Returns the checkpoint path and true if restore should proceed.
// IMPORTANT: We check for checkpoint.done marker (not just metadata.json or inventory.img) because
// checkpoint.done is written LAST in the checkpoint process, after rootfs-diff.tar completes.
// Order: metadata.json -> CRIU dump (*.img files) -> rootfs-diff.tar -> checkpoint.done
func
ShouldRestore
(
cfg
*
Config
,
log
*
logrus
.
Entry
)
(
string
,
bool
)
{
// Method 0: Embedded checkpoint in image (highest priority)
// This is for self-contained checkpoint images where data is baked in
if
cfg
.
EmbeddedCheckpointPath
!=
""
{
metadataPath
:=
cfg
.
EmbeddedCheckpointPath
+
"/"
+
common
.
MetadataFilename
if
_
,
err
:=
os
.
Stat
(
metadataPath
);
err
==
nil
{
log
.
WithField
(
"path"
,
cfg
.
EmbeddedCheckpointPath
)
.
Info
(
"Embedded checkpoint found in image"
)
return
cfg
.
EmbeddedCheckpointPath
,
true
}
}
// Method 1: DYN_CHECKPOINT_HASH is set and checkpoint is fully complete
if
cfg
.
CheckpointHash
!=
""
{
checkpointPath
:=
cfg
.
CheckpointPath
+
"/"
+
cfg
.
CheckpointHash
// Check for checkpoint.done marker (written LAST after rootfs-diff.tar completes)
donePath
:=
checkpointPath
+
"/checkpoint.done"
if
_
,
err
:=
os
.
Stat
(
donePath
);
err
==
nil
{
log
.
WithField
(
"path"
,
checkpointPath
)
.
Info
(
"Checkpoint found (checkpoint.done marker present)"
)
return
checkpointPath
,
true
}
// Fallback: check for metadata.json but warn about potential race condition
metadataPath
:=
checkpointPath
+
"/"
+
common
.
MetadataFilename
if
_
,
err
:=
os
.
Stat
(
metadataPath
);
err
==
nil
{
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
checkpointPath
,
"warning"
:
"checkpoint.done marker not found, checkpoint may be incomplete"
,
})
.
Warn
(
"Checkpoint metadata found but checkpoint.done missing - checkpoint may still be in progress"
)
// Don't return true here - wait for checkpoint.done
}
}
// Method 2: Restore trigger file exists with checkpoint path
if
cfg
.
RestoreTrigger
!=
""
{
data
,
err
:=
os
.
ReadFile
(
cfg
.
RestoreTrigger
)
if
err
==
nil
{
checkpointPath
:=
string
(
data
)
if
checkpointPath
!=
""
{
donePath
:=
checkpointPath
+
"/checkpoint.done"
if
_
,
err
:=
os
.
Stat
(
donePath
);
err
==
nil
{
log
.
WithField
(
"path"
,
checkpointPath
)
.
Info
(
"Restore triggered via file (checkpoint.done marker present)"
)
return
checkpointPath
,
true
}
}
}
}
return
""
,
false
}
// WaitForCheckpoint waits for a checkpoint to become available.
func
WaitForCheckpoint
(
ctx
context
.
Context
,
cfg
*
Config
,
log
*
logrus
.
Entry
)
(
string
,
error
)
{
log
.
WithField
(
"timeout"
,
cfg
.
WaitTimeout
)
.
Info
(
"Waiting for checkpoint"
)
deadline
:=
time
.
Now
()
.
Add
(
cfg
.
WaitTimeout
)
ticker
:=
time
.
NewTicker
(
time
.
Second
)
defer
ticker
.
Stop
()
lastLog
:=
time
.
Now
()
for
{
select
{
case
<-
ctx
.
Done
()
:
return
""
,
ctx
.
Err
()
case
<-
ticker
.
C
:
if
path
,
ok
:=
ShouldRestore
(
cfg
,
log
);
ok
{
return
path
,
nil
}
// Log progress every 30 seconds
if
time
.
Since
(
lastLog
)
>=
30
*
time
.
Second
{
elapsed
:=
time
.
Since
(
deadline
.
Add
(
-
cfg
.
WaitTimeout
))
log
.
WithField
(
"elapsed"
,
elapsed
)
.
Info
(
"Still waiting for checkpoint..."
)
lastLog
=
time
.
Now
()
}
if
time
.
Now
()
.
After
(
deadline
)
{
return
""
,
context
.
DeadlineExceeded
}
}
}
}
// Helper functions
func
getEnvOrDefault
(
key
,
defaultValue
string
)
string
{
if
value
:=
os
.
Getenv
(
key
);
value
!=
""
{
return
value
}
return
defaultValue
}
func
parseDurationOrDefault
(
key
string
,
defaultValue
time
.
Duration
)
time
.
Duration
{
value
:=
os
.
Getenv
(
key
)
if
value
==
""
{
return
defaultValue
}
seconds
,
err
:=
strconv
.
Atoi
(
value
)
if
err
!=
nil
{
return
defaultValue
}
return
time
.
Duration
(
seconds
)
*
time
.
Second
}
func
parseIntOrDefault
(
key
string
,
defaultValue
int32
)
int32
{
value
:=
os
.
Getenv
(
key
)
if
value
==
""
{
return
defaultValue
}
i
,
err
:=
strconv
.
Atoi
(
value
)
if
err
!=
nil
{
return
defaultValue
}
return
int32
(
i
)
}
deploy/chrek/pkg/restore/process.go
View file @
d381e6ff
package
restore
import
(
"errors"
"fmt"
"io"
"os"
...
...
@@ -8,6 +9,7 @@ import (
"os/signal"
"strconv"
"strings"
"sync"
"syscall"
"time"
...
...
@@ -54,31 +56,39 @@ func ForwardProcessOutput(pid int, log *logrus.Entry) int {
// Try to open the process's stdout and stderr via /proc
stdoutPath
:=
fmt
.
Sprintf
(
"/proc/%d/fd/1"
,
pid
)
stderrPath
:=
fmt
.
Sprintf
(
"/proc/%d/fd/2"
,
pid
)
// Channel to signal when copying goroutines should stop
done
:=
make
(
chan
struct
{})
var
wg
sync
.
WaitGroup
// Forward stdout
go
forwardFD
(
stdoutPath
,
os
.
Stdout
,
"stdout"
,
log
,
done
)
wg
.
Add
(
1
)
go
forwardFD
(
stdoutPath
,
os
.
Stdout
,
"stdout"
,
log
,
&
wg
)
// Forward stderr
go
forwardFD
(
stderrPath
,
os
.
Stderr
,
"stderr"
,
log
,
done
)
wg
.
Add
(
1
)
go
forwardFD
(
stderrPath
,
os
.
Stderr
,
"stderr"
,
log
,
&
wg
)
// Wait for process to exit
// Wait for process to exit
(and reap it if it's our child).
exitCode
:=
waitForProcess
(
pid
,
log
)
// Signal goroutines to stop
close
(
done
)
// Give goroutines a moment to flush any remaining output
time
.
Sleep
(
100
*
time
.
Millisecond
)
// Give copy goroutines a short window to flush/finish.
done
:=
make
(
chan
struct
{})
go
func
()
{
wg
.
Wait
()
close
(
done
)
}()
select
{
case
<-
done
:
case
<-
time
.
After
(
2
*
time
.
Second
)
:
log
.
WithField
(
"pid"
,
pid
)
.
Warn
(
"Timed out waiting for output forwarding goroutines to finish"
)
}
return
exitCode
}
// forwardFD copies data from a file descriptor path to a writer.
// It handles the case where the FD may not be readable.
func
forwardFD
(
fdPath
string
,
dst
io
.
Writer
,
name
string
,
log
*
logrus
.
Entry
,
done
<-
chan
struct
{})
{
func
forwardFD
(
fdPath
string
,
dst
io
.
Writer
,
name
string
,
log
*
logrus
.
Entry
,
wg
*
sync
.
WaitGroup
)
{
defer
wg
.
Done
()
// Try to open the FD path
src
,
err
:=
os
.
Open
(
fdPath
)
if
err
!=
nil
{
...
...
@@ -100,54 +110,71 @@ func forwardFD(fdPath string, dst io.Writer, name string, log *logrus.Entry, don
"path"
:
fdPath
,
})
.
Debug
(
"Forwarding process output"
)
// Copy data until done or EOF
buf
:=
make
([]
byte
,
4096
)
for
{
select
{
case
<-
done
:
return
default
:
// Set a read deadline to allow checking done channel periodically
src
.
SetReadDeadline
(
time
.
Now
()
.
Add
(
100
*
time
.
Millisecond
))
n
,
err
:=
src
.
Read
(
buf
)
if
n
>
0
{
dst
.
Write
(
buf
[
:
n
])
}
if
err
!=
nil
{
if
os
.
IsTimeout
(
err
)
{
continue
}
if
err
!=
io
.
EOF
{
log
.
WithError
(
err
)
.
WithField
(
"name"
,
name
)
.
Debug
(
"Error reading from process FD"
)
}
return
}
}
_
,
err
=
io
.
Copy
(
dst
,
src
)
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
io
.
EOF
)
{
log
.
WithError
(
err
)
.
WithField
(
"name"
,
name
)
.
Debug
(
"Error reading from process FD"
)
}
}
// waitForProcess waits for a process to exit and returns its exit code.
func
waitForProcess
(
pid
int
,
log
*
logrus
.
Entry
)
int
{
// Preferred path: restored process is typically our direct child.
// Use wait4() so zombies are reaped and exit status is reliable.
var
status
syscall
.
WaitStatus
for
{
// Check if process still exists by sending signal 0
proc
,
err
:=
os
.
FindProcess
(
pid
)
wpid
,
err
:=
syscall
.
Wait4
(
pid
,
&
status
,
0
,
nil
)
if
errors
.
Is
(
err
,
syscall
.
EINTR
)
{
continue
}
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Failed to find process"
)
if
errors
.
Is
(
err
,
syscall
.
ECHILD
)
{
log
.
WithField
(
"pid"
,
pid
)
.
Warn
(
"Restored process is not a child; falling back to signal-based monitoring"
)
return
waitForProcessBySignal
(
pid
,
log
)
}
log
.
WithError
(
err
)
.
WithField
(
"pid"
,
pid
)
.
Error
(
"Wait4 failed for restored process"
)
return
1
}
err
=
proc
.
Signal
(
syscall
.
Signal
(
0
))
if
err
!=
nil
{
// Process has exited
log
.
WithField
(
"pid"
,
pid
)
.
Info
(
"Restored process exited"
)
// Try to get exit status
exitCode
:=
getExitCode
(
pid
)
log
.
WithField
(
"exit_code"
,
exitCode
)
.
Info
(
"Restored process exit status"
)
if
wpid
!=
pid
{
continue
}
if
status
.
Exited
()
{
exitCode
:=
status
.
ExitStatus
()
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"exit_code"
:
exitCode
,
})
.
Info
(
"Restored process exited"
)
return
exitCode
}
if
status
.
Signaled
()
{
exitCode
:=
128
+
int
(
status
.
Signal
())
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"signal"
:
status
.
Signal
()
.
String
(),
"exit_code"
:
exitCode
,
})
.
Warn
(
"Restored process terminated by signal"
)
return
exitCode
}
log
.
WithField
(
"pid"
,
pid
)
.
Warn
(
"Restored process exited with unexpected wait status"
)
return
1
}
}
func
waitForProcessBySignal
(
pid
int
,
log
*
logrus
.
Entry
)
int
{
for
{
proc
,
err
:=
os
.
FindProcess
(
pid
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"pid"
,
pid
)
.
Error
(
"Failed to find restored process"
)
return
1
}
if
err
:=
proc
.
Signal
(
syscall
.
Signal
(
0
));
err
!=
nil
{
log
.
WithField
(
"pid"
,
pid
)
.
Info
(
"Restored process no longer exists"
)
return
0
}
// Detect zombie state when wait4 is unavailable.
if
state
,
err
:=
readProcState
(
pid
);
err
==
nil
&&
state
==
"Z"
{
log
.
WithField
(
"pid"
,
pid
)
.
Warn
(
"Restored process is zombie while not reaped by this process"
)
return
1
}
time
.
Sleep
(
100
*
time
.
Millisecond
)
}
}
...
...
@@ -182,6 +209,23 @@ func getExitCode(pid int) int {
return
0
}
func
readProcState
(
pid
int
)
(
string
,
error
)
{
data
,
err
:=
os
.
ReadFile
(
fmt
.
Sprintf
(
"/proc/%d/status"
,
pid
))
if
err
!=
nil
{
return
""
,
err
}
for
_
,
line
:=
range
strings
.
Split
(
string
(
data
),
"
\n
"
)
{
if
strings
.
HasPrefix
(
line
,
"State:"
)
{
fields
:=
strings
.
Fields
(
line
)
if
len
(
fields
)
>=
2
{
return
fields
[
1
],
nil
}
break
}
}
return
""
,
fmt
.
Errorf
(
"state field not found in /proc/%d/status"
,
pid
)
}
// SetupSignalForwarding sets up signal forwarding to the restored process.
// Returns a cleanup function that should be called when done.
func
SetupSignalForwarding
(
pid
int
,
log
*
logrus
.
Entry
)
func
()
{
...
...
@@ -232,52 +276,36 @@ func WaitForPidFile(pidFile string, timeout time.Duration, log *logrus.Entry) (i
return
0
,
fmt
.
Errorf
(
"timeout waiting for PID file %s after %v"
,
pidFile
,
timeout
)
}
// RunDefault runs the default command when no checkpoint is available.
// It attempts to detect and run the appropriate default command for the container.
func
RunDefault
(
cfg
*
Config
,
log
*
logrus
.
Entry
)
error
{
// If DEFAULT_CMD is set, use it
if
cfg
.
DefaultCmd
!=
""
{
log
.
WithField
(
"cmd"
,
cfg
.
DefaultCmd
)
.
Info
(
"Running default command"
)
return
execCommand
(
cfg
.
DefaultCmd
)
// ExecColdStart execs the cold start command (ColdStartArgs), replacing the current process.
// If no args are provided, falls back to sleep infinity.
func
ExecColdStart
(
cfg
*
RestoreRequest
,
log
*
logrus
.
Entry
)
error
{
if
len
(
cfg
.
ColdStartArgs
)
==
0
{
log
.
Warn
(
"No cold start command provided, sleeping indefinitely"
)
return
ExecArgs
([]
string
{
"sleep"
,
"infinity"
},
log
)
}
// Try common application entrypoints
if
_
,
err
:=
os
.
Stat
(
"/docker-entrypoint.sh"
);
err
==
nil
{
log
.
Info
(
"Running docker-entrypoint.sh"
)
return
execCommand
(
"/docker-entrypoint.sh nginx -g 'daemon off;'"
)
}
// Check for nginx
if
_
,
err
:=
exec
.
LookPath
(
"nginx"
);
err
==
nil
{
log
.
Info
(
"Running nginx"
)
return
execCommand
(
"nginx -g 'daemon off;'"
)
}
// Fallback to sleep infinity
log
.
Warn
(
"No default command specified and no known entrypoint found, sleeping"
)
return
execCommand
(
"sleep infinity"
)
log
.
WithField
(
"cmd"
,
cfg
.
ColdStartArgs
)
.
Info
(
"Executing cold start command"
)
return
ExecArgs
(
cfg
.
ColdStartArgs
,
log
)
}
// execCommand executes a command by replacing the current process.
func
execCommand
(
cmdLine
string
)
error
{
// Parse command line - simple split by spaces
// For complex commands, shell wrapper is needed
parts
:=
strings
.
Fields
(
cmdLine
)
if
len
(
parts
)
==
0
{
// ExecArgs replaces the current process with the given command and arguments.
// Uses syscall.Exec for proper PID 1 behavior in containers.
func
ExecArgs
(
args
[]
string
,
log
*
logrus
.
Entry
)
error
{
if
len
(
args
)
==
0
{
return
fmt
.
Errorf
(
"empty command"
)
}
cmd
:=
parts
[
0
]
args
:=
parts
// Find the executable path
path
,
err
:=
exec
.
LookPath
(
cmd
)
path
,
err
:=
exec
.
LookPath
(
args
[
0
]
)
if
err
!=
nil
{
// Try running through shell for complex commands
path
=
"/bin/sh"
args
=
[]
string
{
"sh"
,
"-c"
,
cmdLine
}
return
fmt
.
Errorf
(
"command not found: %s: %w"
,
args
[
0
],
err
)
}
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
path
,
"args"
:
args
,
})
.
Debug
(
"Replacing process via syscall.Exec"
)
// Replace current process with the command
return
syscall
.
Exec
(
path
,
args
,
os
.
Environ
())
}
deploy/chrek/pkg/restore/restore.go
View file @
d381e6ff
package
restore
import
(
"bufio"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
criu
"github.com/checkpoint-restore/go-criu/v7"
"github.com/sirupsen/logrus"
"google.golang.org/protobuf/proto"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
// LogGPUDiagnostics logs nvidia-smi and /dev/nvidia* for debugging GPU visibility.
func
LogGPUDiagnostics
(
label
string
,
log
*
logrus
.
Entry
)
{
log
.
Infof
(
"=== GPU DIAGNOSTICS [%s] ==="
,
label
)
diagCtx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
10
*
time
.
Second
)
defer
cancel
()
if
out
,
err
:=
exec
.
CommandContext
(
diagCtx
,
"nvidia-smi"
,
"-L"
)
.
CombinedOutput
();
err
!=
nil
{
log
.
Infof
(
"nvidia-smi -L: error: %v"
,
err
)
}
else
{
log
.
Infof
(
"nvidia-smi -L:
\n
%s"
,
string
(
out
))
}
// Also log memory usage per GPU to detect OOM conditions
diagCtx2
,
cancel2
:=
context
.
WithTimeout
(
context
.
Background
(),
10
*
time
.
Second
)
defer
cancel2
()
if
out
,
err
:=
exec
.
CommandContext
(
diagCtx2
,
"nvidia-smi"
,
"--query-gpu=index,uuid,memory.used,memory.total,memory.free"
,
"--format=csv,noheader"
)
.
CombinedOutput
();
err
!=
nil
{
log
.
Infof
(
"nvidia-smi memory query: error: %v"
,
err
)
}
else
{
log
.
Infof
(
"nvidia-smi memory:
\n
%s"
,
string
(
out
))
}
matches
,
_
:=
filepath
.
Glob
(
"/dev/nvidia*"
)
log
.
Infof
(
"/dev/nvidia* devices: %s"
,
strings
.
Join
(
matches
,
", "
))
log
.
Infof
(
"NVIDIA_VISIBLE_DEVICES=%s"
,
os
.
Getenv
(
"NVIDIA_VISIBLE_DEVICES"
))
log
.
Infof
(
"=== END GPU DIAGNOSTICS [%s] ==="
,
label
)
}
func
processSnapshotPIDs
(
restoredPID
int
)
[]
int
{
pidSet
:=
map
[
int
]
struct
{}{
1
:
{},
os
.
Getpid
()
:
{},
}
if
restoredPID
>
0
{
pidSet
[
restoredPID
]
=
struct
{}{}
}
pids
:=
make
([]
int
,
0
,
len
(
pidSet
))
for
pid
:=
range
pidSet
{
pids
=
append
(
pids
,
pid
)
}
sort
.
Ints
(
pids
)
return
pids
}
func
logProcessNamespaces
(
pid
int
,
log
*
logrus
.
Entry
)
{
for
_
,
ns
:=
range
[]
string
{
"mnt"
,
"pid"
,
"ipc"
,
"net"
,
"uts"
,
"cgroup"
}
{
nsPath
:=
fmt
.
Sprintf
(
"/proc/%d/ns/%s"
,
pid
,
ns
)
link
,
err
:=
os
.
Readlink
(
nsPath
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
nsPath
,
})
.
Warn
(
"Failed to read namespace symlink"
)
continue
}
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"namespace"
:
ns
,
"value"
:
link
,
})
.
Info
(
"Namespace snapshot"
)
}
}
func
logProcessCgroupPath
(
pid
int
,
log
*
logrus
.
Entry
)
{
path
:=
fmt
.
Sprintf
(
"/proc/%d/cgroup"
,
pid
)
data
,
err
:=
os
.
ReadFile
(
path
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
path
,
})
.
Warn
(
"Failed to read cgroup path"
)
return
}
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
path
,
"contents"
:
strings
.
TrimSpace
(
string
(
data
)),
})
.
Info
(
"Cgroup membership snapshot"
)
}
func
logProcessFilteredMountInfo
(
pid
int
,
log
*
logrus
.
Entry
)
{
// Mountinfo dumps are very large; only emit them in DEBUG mode.
if
!
log
.
Logger
.
IsLevelEnabled
(
logrus
.
DebugLevel
)
{
return
}
path
:=
fmt
.
Sprintf
(
"/proc/%d/mountinfo"
,
pid
)
f
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
path
,
})
.
Warn
(
"Failed to open mountinfo"
)
return
}
defer
f
.
Close
()
var
selected
[]
string
scanner
:=
bufio
.
NewScanner
(
f
)
scanner
.
Buffer
(
make
([]
byte
,
0
,
64
*
1024
),
1024
*
1024
)
for
scanner
.
Scan
()
{
line
:=
scanner
.
Text
()
if
strings
.
Contains
(
line
,
" /dev "
)
||
strings
.
Contains
(
line
,
"/dev/"
)
||
strings
.
Contains
(
line
,
"nvidia"
)
||
strings
.
Contains
(
line
,
"cgroup2"
)
{
selected
=
append
(
selected
,
line
)
}
}
if
err
:=
scanner
.
Err
();
err
!=
nil
{
log
.
WithError
(
err
)
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
path
,
})
.
Warn
(
"Failed while scanning mountinfo"
)
return
}
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"path"
:
path
,
"count"
:
len
(
selected
),
})
.
Debug
(
"Filtered mountinfo snapshot count"
)
if
len
(
selected
)
>
0
{
for
i
,
line
:=
range
selected
{
log
.
WithFields
(
logrus
.
Fields
{
"pid"
:
pid
,
"index"
:
i
+
1
,
"total"
:
len
(
selected
),
})
.
Debugf
(
"Filtered mountinfo: %s"
,
line
)
}
}
}
func
logNvidiaDeviceNodeMetadata
(
log
*
logrus
.
Entry
)
{
devices
,
err
:=
filepath
.
Glob
(
"/dev/nvidia*"
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to glob /dev/nvidia*"
)
return
}
if
len
(
devices
)
==
0
{
log
.
Info
(
"No /dev/nvidia* entries found"
)
return
}
for
_
,
path
:=
range
devices
{
fi
,
err
:=
os
.
Lstat
(
path
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"path"
,
path
)
.
Warn
(
"Failed to stat NVIDIA device entry"
)
continue
}
stat
,
ok
:=
fi
.
Sys
()
.
(
*
syscall
.
Stat_t
)
if
!
ok
{
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
path
,
"mode"
:
fi
.
Mode
()
.
String
(),
})
.
Warn
(
"Unexpected stat type for NVIDIA device entry"
)
continue
}
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
path
,
"mode"
:
fi
.
Mode
()
.
String
(),
"inode"
:
stat
.
Ino
,
"rdev"
:
fmt
.
Sprintf
(
"0x%x"
,
stat
.
Rdev
),
})
.
Info
(
"NVIDIA device entry metadata"
)
}
}
func
logCgroupV2HostInfo
(
log
*
logrus
.
Entry
)
{
const
controllersPath
=
"/sys/fs/cgroup/cgroup.controllers"
data
,
err
:=
os
.
ReadFile
(
controllersPath
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"path"
,
controllersPath
)
.
Warn
(
"Failed to read cgroup v2 controllers"
)
return
}
log
.
WithFields
(
logrus
.
Fields
{
"path"
:
controllersPath
,
"controllers"
:
strings
.
TrimSpace
(
string
(
data
)),
})
.
Info
(
"cgroup v2 controllers"
)
}
// LogRestoreBoundaryDiagnostics captures cgroup and namespace state around CRIU restore.
func
LogRestoreBoundaryDiagnostics
(
label
string
,
restoredPID
int
,
log
*
logrus
.
Entry
)
{
log
.
Infof
(
"=== RESTORE BOUNDARY DIAGNOSTICS [%s] ==="
,
label
)
for
_
,
pid
:=
range
processSnapshotPIDs
(
restoredPID
)
{
logProcessNamespaces
(
pid
,
log
)
logProcessCgroupPath
(
pid
,
log
)
logProcessFilteredMountInfo
(
pid
,
log
)
}
logCgroupV2HostInfo
(
log
)
logNvidiaDeviceNodeMetadata
(
log
)
log
.
Infof
(
"=== END RESTORE BOUNDARY DIAGNOSTICS [%s] ==="
,
label
)
}
// Restore performs the CRIU restore operation using go-criu.
// All CRIU options are read from the saved CheckpointManifest - no hardcoding.
// Returns the PID of the restored process.
func
Restore
(
ctx
context
.
Context
,
opts
*
RestoreOptions
,
log
*
logrus
.
Entry
)
(
int
,
error
)
{
log
.
WithField
(
"checkpoint"
,
opts
.
CheckpointPath
)
.
Info
(
"Starting CRIU restore"
)
func
Restore
(
ctx
context
.
Context
,
checkpointPath
string
,
data
*
checkpoint
.
CheckpointManifest
,
log
*
logrus
.
Entry
)
(
int
,
error
)
{
if
data
==
nil
{
return
0
,
fmt
.
Errorf
(
"checkpoint manifest is required"
)
}
// Hardcoded restore constants
const
(
rootPath
=
"/"
pidFile
=
"/tmp/restored.pid"
logFile
=
RestoreLogFilename
)
log
.
WithField
(
"checkpoint"
,
checkpointPath
)
.
Info
(
"Starting CRIU restore"
)
// 1. Open checkpoint directory
imageDir
,
imageDirFD
,
err
:=
OpenImageDir
(
opts
.
C
heckpointPath
)
imageDir
,
imageDirFD
,
err
:=
OpenImageDir
(
c
heckpointPath
)
if
err
!=
nil
{
return
0
,
err
}
defer
imageDir
.
Close
()
log
.
WithField
(
"fd"
,
imageDirFD
)
.
Debug
(
"Opened checkpoint directory"
)
// 2. Generate external mount mappings if not already set
if
opts
.
ExtMountMaps
==
nil
{
extMounts
,
err
:=
GenerateExtMountMaps
(
nil
)
if
err
!=
nil
{
return
0
,
fmt
.
Errorf
(
"failed to generate mount maps: %w"
,
err
)
}
opts
.
ExtMountMaps
=
extMounts
// 2. Generate external mount mappings from saved CheckpointManifest
extMounts
,
err
:=
GenerateExtMountMaps
(
data
)
if
err
!=
nil
{
return
0
,
fmt
.
Errorf
(
"failed to generate mount maps: %w"
,
err
)
}
log
.
WithField
(
"mount_count"
,
len
(
opts
.
ExtMountMaps
))
.
Debug
(
"External mount maps ready"
)
// 3. Open target network namespace
netNsFile
,
netNsFD
,
err
:=
OpenNetworkNamespace
(
"/proc/1/ns/net"
)
...
...
@@ -42,53 +244,44 @@ func Restore(ctx context.Context, opts *RestoreOptions, log *logrus.Entry) (int,
return
0
,
err
}
defer
netNsFile
.
Close
()
log
.
WithField
(
"fd"
,
netNsFD
)
.
Debug
(
"Opened target network namespace"
)
// 4. Open work directory if specified
// 4. Open work directory if specified
in checkpoint dump settings.
var
workDirFile
*
os
.
File
var
workDirFD
int32
=
-
1
if
opts
.
WorkDir
!=
""
{
workDirFile
,
workDirFD
=
OpenWorkDir
(
opts
.
WorkDir
,
log
)
if
data
.
CRIUDump
.
CRIU
.
WorkDir
!=
""
{
workDirFile
,
workDirFD
=
OpenWorkDir
(
data
.
CRIUDump
.
CRIU
.
WorkDir
,
log
)
if
workDirFile
!=
nil
{
defer
workDirFile
.
Close
()
}
}
// 5. Build CRIU options
cfg
:=
CRIURestoreConfig
{
ImageDirFD
:
imageDirFD
,
RootPath
:
opts
.
RootPath
,
LogLevel
:
opts
.
LogLevel
,
LogFile
:
opts
.
LogFile
,
WorkDirFD
:
workDirFD
,
NetNsFD
:
netNsFD
,
ExtMountMaps
:
opts
.
ExtMountMaps
,
// 5. Build CRIU options from saved checkpoint manifest.
plan
:=
CRIURestorePlan
{
// File descriptors
ImageDirFD
:
imageDirFD
,
WorkDirFD
:
workDirFD
,
NetNsFD
:
netNsFD
,
// Paths
RootPath
:
rootPath
,
LogFile
:
logFile
,
// Options from CheckpointManifest.CRIUDump.CRIU
LogLevel
:
data
.
CRIUDump
.
CRIU
.
LogLevel
,
Timeout
:
data
.
CRIUDump
.
CRIU
.
Timeout
,
ShellJob
:
data
.
CRIUDump
.
CRIU
.
ShellJob
,
TcpClose
:
data
.
CRIUDump
.
CRIU
.
TcpClose
,
FileLocks
:
data
.
CRIUDump
.
CRIU
.
FileLocks
,
ExtUnixSk
:
data
.
CRIUDump
.
CRIU
.
ExtUnixSk
,
LinkRemap
:
data
.
CRIUDump
.
CRIU
.
LinkRemap
,
ManageCgroupsMode
:
data
.
CRIUDump
.
CRIU
.
ManageCgroupsMode
,
// External mounts
ExtMountMaps
:
extMounts
,
}
criuOpts
:=
BuildRestore
CRIUOpts
(
cfg
)
criuOpts
:=
Build
CRIU
Restore
Options
(
plan
)
// 6. Create CRIU config file for CUDA plugin if libdir is specified
if
opts
.
LibDir
!=
""
{
if
opts
.
Timeout
==
0
{
return
0
,
fmt
.
Errorf
(
"CRIU_TIMEOUT environment variable must be set for CUDA restores"
)
}
configPath
:=
filepath
.
Join
(
opts
.
CheckpointPath
,
"restore-criu.conf"
)
configContent
:=
fmt
.
Sprintf
(
`enable-external-masters
libdir %s
tcp-close
link-remap
timeout %d
allow-uprobes
skip-in-flight
`
,
opts
.
LibDir
,
opts
.
Timeout
)
if
err
:=
os
.
WriteFile
(
configPath
,
[]
byte
(
configContent
),
0644
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to write CRIU config file for restore"
)
}
else
{
criuOpts
.
ConfigFile
=
proto
.
String
(
configPath
)
log
.
WithFields
(
logrus
.
Fields
{
"config_path"
:
configPath
,
"lib_dir"
:
opts
.
LibDir
,
})
.
Info
(
"Created CRIU config file with libdir for CUDA plugin"
)
}
// 6. Reuse criu.conf from checkpoint time if it exists.
criuConfPath
:=
filepath
.
Join
(
checkpointPath
,
checkpoint
.
CheckpointCRIUConfFilename
)
if
_
,
err
:=
os
.
Stat
(
criuConfPath
);
err
==
nil
{
criuOpts
.
ConfigFile
=
proto
.
String
(
criuConfPath
)
}
// 7. Execute CRIU restore
...
...
@@ -99,7 +292,7 @@ skip-in-flight
criuExecStart
:=
time
.
Now
()
if
err
:=
c
.
Restore
(
criuOpts
,
notify
);
err
!=
nil
{
log
.
WithField
(
"duration"
,
time
.
Since
(
criuExecStart
))
.
Error
(
"CRIU c.Restore failed"
)
logCRIUErrors
(
opts
.
C
heckpointPath
,
opts
.
L
ogFile
,
log
)
logCRIUErrors
(
c
heckpointPath
,
l
ogFile
,
log
)
return
0
,
fmt
.
Errorf
(
"CRIU restore failed: %w"
,
err
)
}
...
...
@@ -114,15 +307,11 @@ skip-in-flight
}
// Fallback: try to read from PID file
if
opts
.
PidFile
!=
""
{
pid
,
err
:=
WaitForPidFile
(
opts
.
PidFile
,
10
*
time
.
Second
,
log
)
if
err
!=
nil
{
return
0
,
fmt
.
Errorf
(
"failed to get restored PID: %w"
,
err
)
}
return
pid
,
nil
pid
,
err
:=
WaitForPidFile
(
pidFile
,
10
*
time
.
Second
,
log
)
if
err
!=
nil
{
return
0
,
fmt
.
Errorf
(
"failed to get restored PID: %w"
,
err
)
}
return
0
,
fmt
.
Errorf
(
"could not determine restored process PID"
)
return
pid
,
nil
}
// logCRIUErrors reads CRIU log file and logs errors.
...
...
@@ -142,62 +331,58 @@ func logCRIUErrors(checkpointPath, logFile string, log *logrus.Entry) {
}
log
.
Error
(
"=== CRIU RESTORE LOG END ==="
)
// Copy log to shared directory if CRIU_LOG_DIR is set
if
logDir
:=
os
.
Getenv
(
"CRIU_LOG_DIR"
);
logDir
!=
""
{
if
err
:=
os
.
MkdirAll
(
logDir
,
0755
);
err
==
nil
{
destPath
:=
filepath
.
Join
(
logDir
,
fmt
.
Sprintf
(
"restore-%d.log"
,
time
.
Now
()
.
Unix
()))
if
err
:=
os
.
WriteFile
(
destPath
,
data
,
0644
);
err
==
nil
{
log
.
WithField
(
"path"
,
destPath
)
.
Info
(
"CRIU log copied to shared directory"
)
}
// Copy log to shared directory for debugging
if
err
:=
os
.
MkdirAll
(
CRIULogDir
,
0755
);
err
==
nil
{
destPath
:=
filepath
.
Join
(
CRIULogDir
,
fmt
.
Sprintf
(
"restore-%d.log"
,
time
.
Now
()
.
Unix
()))
if
err
:=
os
.
WriteFile
(
destPath
,
data
,
0644
);
err
==
nil
{
log
.
WithField
(
"path"
,
destPath
)
.
Info
(
"CRIU log copied to shared directory"
)
}
}
}
// Run is the main entry point for the restore entrypoint.
// It orchestrates the entire restore process.
func
Run
(
ctx
context
.
Context
,
cfg
*
Config
,
log
*
logrus
.
Entry
)
error
{
log
.
Info
(
"===
Self-
Restor
ing Placeholder
Entrypoint ==="
)
func
Run
(
ctx
context
.
Context
,
cfg
*
RestoreRequest
,
log
*
logrus
.
Entry
)
error
{
log
.
Info
(
"=== Restor
e
Entrypoint ==="
)
log
.
WithFields
(
logrus
.
Fields
{
"checkpoint_path"
:
cfg
.
CheckpointPath
,
"checkpoint_hash"
:
cfg
.
CheckpointHash
,
"
embedded_
checkpoint_
path"
:
cfg
.
Embedded
Checkpoint
Path
,
"wait_for_checkpoint"
:
cfg
.
WaitForCheckpoint
,
"
restore_marker_file"
:
cfg
.
RestoreMarkerFile
,
})
.
Info
(
"Configuration"
)
"checkpoint_
location"
:
cfg
.
Checkpoint
Location
,
"
skip_
wait_for_checkpoint"
:
cfg
.
Skip
WaitForCheckpoint
,
"
cold_start_args"
:
cfg
.
ColdStartArgs
,
})
.
Debug
(
"Configuration"
)
// Check CRIU availability
c
:=
criu
.
MakeCriu
()
version
,
err
:=
c
.
GetCriuVersion
()
if
err
!=
nil
{
if
_
,
err
:=
c
.
GetCriuVersion
();
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"CRIU is not available"
)
log
.
Info
(
"Falling back to default command"
)
return
RunDefault
(
cfg
,
log
)
return
ExecColdStart
(
cfg
,
log
)
}
log
.
WithField
(
"version"
,
version
)
.
Info
(
"CRIU version"
)
// Determine checkpoint path
// Determine checkpoint path
based on mode
var
checkpointPath
string
var
shouldRestore
bool
// Check if we should restore immediately
checkpointPath
,
shouldRestore
=
ShouldRestore
(
cfg
,
log
)
// If not and we're configured to wait, wait for checkpoint
if
!
shouldRestore
&&
cfg
.
WaitForCheckpoint
{
log
.
Info
(
"Waiting for checkpoint..."
)
var
err
error
checkpointPath
,
err
=
WaitForCheckpoint
(
ctx
,
cfg
,
log
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Info
(
"No checkpoint received, running default command"
)
return
RunDefault
(
cfg
,
log
)
if
cfg
.
SkipWaitForCheckpoint
{
// Operator path: check once, restore if ready, otherwise cold start
var
ready
bool
checkpointPath
,
ready
=
ShouldRestore
(
cfg
,
log
)
if
!
ready
{
log
.
Info
(
"No checkpoint ready, executing cold start command"
)
return
ExecColdStart
(
cfg
,
log
)
}
}
else
{
// Standalone/DaemonSet path: check first, then poll if needed
var
ready
bool
checkpointPath
,
ready
=
ShouldRestore
(
cfg
,
log
)
if
!
ready
{
log
.
Info
(
"Waiting for checkpoint..."
)
var
err
error
checkpointPath
,
err
=
WaitForCheckpoint
(
ctx
,
cfg
,
log
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Info
(
"No checkpoint received"
)
return
ExecColdStart
(
cfg
,
log
)
}
}
shouldRestore
=
true
}
// If no checkpoint, run default command
if
!
shouldRestore
{
log
.
Info
(
"No checkpoint configured, running default command"
)
return
RunDefault
(
cfg
,
log
)
}
// Perform restore
...
...
@@ -205,68 +390,61 @@ func Run(ctx context.Context, cfg *Config, log *logrus.Entry) error {
restoreStart
:=
time
.
Now
()
// Apply filesystem changes
rootfsDiffStart
:=
time
.
Now
()
if
err
:=
ApplyRootfsDiff
(
checkpointPath
,
"/"
,
log
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Failed to apply rootfs diff"
)
}
log
.
WithField
(
"duration"
,
time
.
Since
(
rootfsDiffStart
))
.
Info
(
"ApplyRootfsDiff completed"
)
deletedFilesStart
:=
time
.
Now
()
if
err
:=
ApplyDeletedFiles
(
checkpointPath
,
"/"
,
log
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Failed to apply deleted files"
)
}
log
.
WithField
(
"duration"
,
time
.
Since
(
deletedFilesStart
))
.
Info
(
"ApplyDeletedFiles completed"
)
// Load restore options from metadata
loadOptsStart
:=
time
.
Now
()
opts
,
err
:=
LoadRestoreOptions
(
checkpointPath
,
cfg
.
CRIULogLevel
)
// Load checkpoint manifest (contains CRIU settings + mounts + namespaces).
data
,
err
:=
checkpoint
.
ReadCheckpointManifest
(
checkpointPath
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Could not load restore options from metadata, using defaults"
)
log
.
WithError
(
err
)
.
Error
(
"Failed to load checkpoint manifest"
)
return
ExecColdStart
(
cfg
,
log
)
}
log
.
WithField
(
"duration"
,
time
.
Since
(
loadOptsStart
))
.
Info
(
"LoadRestoreOptions completed"
)
// Apply additional config options
if
cfg
.
CRIUWorkDir
!=
""
{
opts
.
WorkDir
=
cfg
.
CRIUWorkDir
// Write restore marker file before CRIU restore
restoreMarkerFile
:=
cfg
.
RestoreMarkerFilePath
if
err
:=
os
.
MkdirAll
(
filepath
.
Dir
(
restoreMarkerFile
),
0755
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to create restore marker directory"
)
}
if
err
:=
os
.
WriteFile
(
restoreMarkerFile
,
[]
byte
(
"restored"
),
0644
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to write restore marker file"
)
}
// Set CUDA plugin directory and timeout for restore config file
if
cfg
.
CUDAPluginDir
!=
""
{
if
cfg
.
CRIUTimeout
==
0
{
return
fmt
.
Errorf
(
"CRIU_TIMEOUT environment variable must be set for CUDA restores"
)
}
opts
.
LibDir
=
cfg
.
CUDAPluginDir
opts
.
Timeout
=
cfg
.
CRIUTimeout
log
.
WithFields
(
logrus
.
Fields
{
"lib_dir"
:
cfg
.
CUDAPluginDir
,
"timeout"
:
cfg
.
CRIUTimeout
,
})
.
Info
(
"CUDA plugin directory and timeout configured for restore"
)
// Restore /dev/shm contents before CRIU restore
if
err
:=
RestoreDevShm
(
checkpointPath
,
log
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Failed to restore /dev/shm contents - CRIU restore may fail with missing FD errors"
)
}
// Write restore marker file before CRIU restore
// This allows the restored process to detect it's been restored
if
cfg
.
RestoreMarkerFile
!=
""
{
if
err
:=
os
.
WriteFile
(
cfg
.
RestoreMarkerFile
,
[]
byte
(
"restored"
),
0644
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to write restore marker file"
)
}
else
{
log
.
WithField
(
"path"
,
cfg
.
RestoreMarkerFile
)
.
Info
(
"Wrote restore marker file"
)
}
// Create link_remap stub files for unlinked files referenced in CRIU images
if
err
:=
CreateLinkRemapStubs
(
checkpointPath
,
log
);
err
!=
nil
{
log
.
WithError
(
err
)
.
Warn
(
"Failed to create link_remap stubs"
)
}
// Log GPU diagnostics right before CRIU restore to track device visibility changes
LogGPUDiagnostics
(
"PRE-CRIU-RESTORE"
,
log
)
LogRestoreBoundaryDiagnostics
(
"PRE-CRIU-RESTORE"
,
0
,
log
)
// Perform CRIU restore (CUDA plugin handles CUDA state automatically)
criuRestoreStart
:=
time
.
Now
()
pid
,
err
:=
Restore
(
ctx
,
opts
,
log
)
pid
,
err
:=
Restore
(
ctx
,
checkpointPath
,
data
,
log
)
if
err
!=
nil
{
log
.
WithField
(
"duration"
,
time
.
Since
(
criuRestoreStart
))
.
WithError
(
err
)
.
Error
(
"Restore failed, falling back to default command"
)
if
cfg
.
Debug
{
log
.
Info
(
"DEBUG mode: sleeping 300s to allow log collection..."
)
time
.
Sleep
(
300
*
time
.
Second
)
}
return
RunDefaul
t
(
cfg
,
log
)
return
ExecColdStar
t
(
cfg
,
log
)
}
criuRestoreDuration
:=
time
.
Since
(
criuRestoreStart
)
log
.
WithField
(
"duration"
,
criuRestoreDuration
)
.
Info
(
"CRIU Restore completed (CUDA state restored by plugin)"
)
// Log GPU diagnostics AFTER restore to compare with pre-restore
LogGPUDiagnostics
(
"POST-RESTORE"
,
log
)
LogRestoreBoundaryDiagnostics
(
"POST-RESTORE"
,
pid
,
log
)
totalDuration
:=
time
.
Since
(
restoreStart
)
log
.
WithFields
(
logrus
.
Fields
{
"total_duration"
:
totalDuration
,
...
...
deploy/chrek/pkg/restore/shm.go
0 → 100644
View file @
d381e6ff
// Package restore provides CRIU restore operations.
package
restore
import
(
"fmt"
"io"
"os"
"path/filepath"
"github.com/sirupsen/logrus"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)
// RestoreDevShm restores files from the checkpoint's dev-shm directory to /dev/shm.
// This must be called BEFORE CRIU restore so that the shared memory files exist
// when CRIU tries to restore file descriptors pointing to them.
func
RestoreDevShm
(
checkpointPath
string
,
log
*
logrus
.
Entry
)
error
{
srcDir
:=
filepath
.
Join
(
checkpointPath
,
checkpoint
.
DevShmDirName
)
// Check if dev-shm directory exists in checkpoint
entries
,
err
:=
os
.
ReadDir
(
srcDir
)
if
err
!=
nil
{
if
os
.
IsNotExist
(
err
)
{
log
.
Debug
(
"No dev-shm directory in checkpoint, skipping restore"
)
return
nil
}
return
fmt
.
Errorf
(
"failed to read checkpoint dev-shm directory: %w"
,
err
)
}
if
len
(
entries
)
==
0
{
log
.
Debug
(
"Checkpoint dev-shm directory is empty"
)
return
nil
}
// Ensure /dev/shm exists and is writable
destDir
:=
"/dev/shm"
if
err
:=
os
.
MkdirAll
(
destDir
,
0777
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to ensure /dev/shm exists: %w"
,
err
)
}
var
restored
[]
string
var
totalSize
int64
for
_
,
entry
:=
range
entries
{
if
entry
.
IsDir
()
{
continue
}
name
:=
entry
.
Name
()
srcPath
:=
filepath
.
Join
(
srcDir
,
name
)
destPath
:=
filepath
.
Join
(
destDir
,
name
)
info
,
err
:=
entry
.
Info
()
if
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"file"
,
name
)
.
Warn
(
"Failed to get file info, skipping"
)
continue
}
size
:=
info
.
Size
()
// Copy the file to /dev/shm
if
err
:=
copyFileToShm
(
srcPath
,
destPath
,
info
.
Mode
());
err
!=
nil
{
log
.
WithError
(
err
)
.
WithField
(
"file"
,
name
)
.
Warn
(
"Failed to restore file, skipping"
)
continue
}
restored
=
append
(
restored
,
name
)
totalSize
+=
size
log
.
WithFields
(
logrus
.
Fields
{
"file"
:
name
,
"size"
:
size
,
})
.
Debug
(
"Restored /dev/shm file"
)
}
if
len
(
restored
)
>
0
{
log
.
WithFields
(
logrus
.
Fields
{
"count"
:
len
(
restored
),
"total_size"
:
totalSize
,
"files"
:
restored
,
})
.
Info
(
"Restored /dev/shm files from checkpoint"
)
}
return
nil
}
// copyFileToShm copies a file from src to dest in /dev/shm.
// Uses mode 0666 as default when mode is 0, otherwise preserves the original mode.
func
copyFileToShm
(
src
,
dest
string
,
mode
os
.
FileMode
)
error
{
srcFile
,
err
:=
os
.
Open
(
src
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to open source: %w"
,
err
)
}
defer
srcFile
.
Close
()
// Default to 0666 when mode is not set (mode == 0)
if
mode
==
0
{
mode
=
0666
}
destFile
,
err
:=
os
.
OpenFile
(
dest
,
os
.
O_CREATE
|
os
.
O_WRONLY
|
os
.
O_TRUNC
,
mode
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to create destination: %w"
,
err
)
}
defer
destFile
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
srcFile
);
err
!=
nil
{
return
fmt
.
Errorf
(
"failed to copy contents: %w"
,
err
)
}
return
nil
}
deploy/chrek/pkg/watcher/watcher.go
View file @
d381e6ff
...
...
@@ -21,18 +21,6 @@ import (
"k8s.io/client-go/tools/cache"
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
checkpointk8s
"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint/k8s"
)
const
(
// LabelCheckpointSource is the label that triggers automatic checkpointing
LabelCheckpointSource
=
"nvidia.com/checkpoint-source"
// LabelCheckpointHash is the label specifying the checkpoint identity hash
LabelCheckpointHash
=
"nvidia.com/checkpoint-hash"
// EnvCheckpointSignalFile is the env var in the pod specifying the signal file path
EnvCheckpointSignalFile
=
"DYN_CHECKPOINT_SIGNAL_FILE"
)
// SignalFile represents the content of a checkpoint completion signal file
...
...
@@ -44,26 +32,21 @@ type SignalFile struct {
Error
string
`json:"error,omitempty"`
}
// Config holds watcher configuration
type
Config
struct
{
//
Watcher
Config holds watcher configuration
.
type
Watcher
Config
struct
{
NodeName
string
CheckpointDir
string
HostProc
string
ListenAddr
string
// HTTP server address for health checks (e.g., ":8080")
RestrictedNamespace
string
// Optional: restrict watching to this namespace (empty = cluster-wide)
// GPU/CUDA checkpoint options (passed to checkpoint.Options)
CUDAPluginDir
string
// Path to CRIU CUDA plugin directory
GhostLimit
uint32
// Ghost file size limit in bytes (default: 512MB for GPU)
Timeout
uint32
// CRIU timeout in seconds
ExternalMounts
[]
string
// Additional external mount mappings
// Checkpoint configuration (from ConfigMap)
CheckpointSpec
*
checkpoint
.
CheckpointSpec
}
// Watcher watches for pods with checkpoint labels and triggers checkpoints
type
Watcher
struct
{
config
Config
config
Watcher
Config
clientset
kubernetes
.
Interface
discoveryClient
*
checkpoint
k8s
.
DiscoveryClient
discoveryClient
*
checkpoint
.
DiscoveryClient
checkpointer
*
checkpoint
.
Checkpointer
log
*
logrus
.
Entry
...
...
@@ -75,7 +58,7 @@ type Watcher struct {
}
// NewWatcher creates a new pod watcher
func
NewWatcher
(
cfg
Config
,
discoveryClient
*
checkpoint
k8s
.
DiscoveryClient
,
checkpointer
*
checkpoint
.
Checkpointer
)
(
*
Watcher
,
error
)
{
func
NewWatcher
(
cfg
Watcher
Config
,
discoveryClient
*
checkpoint
.
DiscoveryClient
,
checkpointer
*
checkpoint
.
Checkpointer
)
(
*
Watcher
,
error
)
{
// Create in-cluster Kubernetes client
restConfig
,
err
:=
rest
.
InClusterConfig
()
if
err
!=
nil
{
...
...
@@ -100,10 +83,13 @@ func NewWatcher(cfg Config, discoveryClient *checkpointk8s.DiscoveryClient, chec
// Start begins watching for pods and starts the health check server
func
(
w
*
Watcher
)
Start
(
ctx
context
.
Context
)
error
{
if
w
.
config
.
CheckpointSpec
==
nil
{
return
fmt
.
Errorf
(
"checkpoint spec is required"
)
}
w
.
log
.
WithFields
(
logrus
.
Fields
{
"node"
:
w
.
config
.
NodeName
,
"label"
:
LabelCheckpointSource
,
"signal_file_env"
:
EnvCheckpointSignalFile
,
"node"
:
w
.
config
.
NodeName
,
"label"
:
checkpoint
.
KubeLabelCheckpointSource
,
})
.
Info
(
"Starting pod watcher"
)
// Start health check HTTP server if address is configured
...
...
@@ -118,7 +104,7 @@ func (w *Watcher) Start(ctx context.Context) error {
// Create informer factory with label selector and optional namespace restriction
labelSelector
:=
labels
.
SelectorFromSet
(
labels
.
Set
{
LabelCheckpointSource
:
"true"
,
checkpoint
.
Kube
LabelCheckpointSource
:
"true"
,
})
.
String
()
factoryOptions
:=
[]
informers
.
SharedInformerOption
{
...
...
@@ -232,7 +218,7 @@ func (w *Watcher) handlePodEvent(ctx context.Context, pod *corev1.Pod) {
podKey
:=
fmt
.
Sprintf
(
"%s/%s"
,
pod
.
Namespace
,
pod
.
Name
)
// Get checkpoint ID from label (uses the checkpoint hash)
checkpointID
,
ok
:=
pod
.
Labels
[
LabelCheckpointHash
]
checkpointID
,
ok
:=
pod
.
Labels
[
checkpoint
.
Kube
LabelCheckpointHash
]
if
!
ok
||
checkpointID
==
""
{
w
.
log
.
WithField
(
"pod"
,
podKey
)
.
Warn
(
"Pod has checkpoint label but no checkpoint-hash label"
)
return
...
...
@@ -282,12 +268,14 @@ func (w *Watcher) doCheckpoint(ctx context.Context, pod *corev1.Pod, checkpointI
// Find the main container and get signal file path from env
var
containerID
string
var
containerName
string
var
signalFilePath
string
for
_
,
container
:=
range
pod
.
Spec
.
Containers
{
if
container
.
Name
==
"main"
||
len
(
pod
.
Spec
.
Containers
)
==
1
{
containerName
=
container
.
Name
// Get signal file path from environment
for
_
,
env
:=
range
container
.
Env
{
if
env
.
Name
==
EnvCheckpointSignalFile
{
if
env
.
Name
==
"DYN_CHECKPOINT_SIGNAL_FILE"
{
signalFilePath
=
env
.
Value
break
}
...
...
@@ -325,8 +313,8 @@ func (w *Watcher) doCheckpoint(ctx context.Context, pod *corev1.Pod, checkpointI
"signal_file_path"
:
signalFilePath
,
})
.
Info
(
"Found container, starting checkpoint"
)
// Resolve container to get PID for signal file writing
container
Info
,
err
:=
w
.
discoveryClient
.
ResolveContainer
(
ctx
,
containerID
)
// Resolve container to get PID for signal file writing
.
container
PID
,
_
,
err
:=
w
.
discoveryClient
.
ResolveContainer
(
ctx
,
containerID
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Failed to resolve container"
)
w
.
checkpointedMu
.
Lock
()
...
...
@@ -335,28 +323,34 @@ func (w *Watcher) doCheckpoint(ctx context.Context, pod *corev1.Pod, checkpointI
return
}
// Validate CheckpointSpec is set
if
w
.
config
.
CheckpointSpec
==
nil
{
log
.
Error
(
"CheckpointSpec is nil - cannot perform checkpoint"
)
w
.
checkpointedMu
.
Lock
()
delete
(
w
.
checkpointed
,
podKey
)
w
.
checkpointedMu
.
Unlock
()
return
}
// Perform checkpoint
opts
:=
checkpoint
.
Options
{
ContainerID
:
containerID
,
CheckpointID
:
checkpointID
,
CheckpointDir
:
w
.
config
.
CheckpointDir
,
NodeName
:
w
.
config
.
NodeName
,
PodName
:
pod
.
Name
,
PodNamespace
:
pod
.
Namespace
,
CUDAPluginDir
:
w
.
config
.
CUDAPluginDir
,
GhostLimit
:
w
.
config
.
GhostLimit
,
Timeout
:
w
.
config
.
Timeout
,
ExternalMounts
:
w
.
config
.
ExternalMounts
,
params
:=
checkpoint
.
CheckpointRequest
{
ContainerID
:
containerID
,
ContainerName
:
containerName
,
CheckpointID
:
checkpointID
,
CheckpointDir
:
w
.
config
.
CheckpointSpec
.
BasePath
,
NodeName
:
w
.
config
.
NodeName
,
PodName
:
pod
.
Name
,
PodNamespace
:
pod
.
Namespace
,
}
result
,
err
:=
w
.
checkpointer
.
Checkpoint
(
ctx
,
opts
)
result
,
err
:=
w
.
checkpointer
.
Checkpoint
(
ctx
,
params
,
w
.
config
.
CheckpointSpec
)
if
err
!=
nil
{
log
.
WithError
(
err
)
.
Error
(
"Checkpoint failed"
)
// Write failure marker to PVC so restore pods know checkpoint failed
checkpointDir
:=
filepath
.
Join
(
w
.
config
.
Checkpoint
Dir
,
checkpointID
)
checkpointDir
:=
filepath
.
Join
(
w
.
config
.
Checkpoint
Spec
.
BasePath
,
checkpointID
)
w
.
writeCheckpointDoneMarker
(
checkpointDir
,
checkpointID
,
false
,
err
.
Error
(),
log
)
if
signalFilePath
!=
""
{
w
.
writeSignalFileToPod
(
int
(
container
Info
.
PID
)
,
signalFilePath
,
checkpointID
,
""
,
false
,
err
.
Error
())
w
.
writeSignalFileToPod
(
containerPID
,
signalFilePath
,
checkpointID
,
""
,
false
,
err
.
Error
())
}
// Clear the in_progress status so checkpoint can be retried
w
.
checkpointedMu
.
Lock
()
...
...
@@ -368,12 +362,11 @@ func (w *Watcher) doCheckpoint(ctx context.Context, pod *corev1.Pod, checkpointI
log
.
WithField
(
"checkpoint_dir"
,
result
.
CheckpointDir
)
.
Info
(
"Checkpoint completed successfully"
)
// Write checkpoint.done marker to PVC for cross-node restore detection
// This is written AFTER rootfs-diff.tar is complete, so it's safe to use as a completion marker
w
.
writeCheckpointDoneMarker
(
result
.
CheckpointDir
,
checkpointID
,
true
,
""
,
log
)
// Write signal file to pod's hostPath for checkpoint job pod to exit
if
signalFilePath
!=
""
{
w
.
writeSignalFileToPod
(
int
(
container
Info
.
PID
)
,
signalFilePath
,
checkpointID
,
result
.
CheckpointDir
,
true
,
""
)
w
.
writeSignalFileToPod
(
containerPID
,
signalFilePath
,
checkpointID
,
result
.
CheckpointDir
,
true
,
""
)
}
// Mark as completed so we don't checkpoint again
...
...
@@ -400,8 +393,7 @@ func (w *Watcher) writeSignalFileToPod(pid int, signalFilePath, checkpointID, ch
}
// Write to the pod's filesystem via /proc/<pid>/root
// signalFilePath is the path inside the pod (e.g., /var/lib/dynamo-checkpoint/signal.done)
hostSignalPath
:=
fmt
.
Sprintf
(
"%s/%d/root%s"
,
w
.
config
.
HostProc
,
pid
,
signalFilePath
)
hostSignalPath
:=
fmt
.
Sprintf
(
"%s/%d/root%s"
,
checkpoint
.
HostProcPath
,
pid
,
signalFilePath
)
// Ensure signal directory exists in pod's filesystem
signalDir
:=
filepath
.
Dir
(
hostSignalPath
)
...
...
@@ -424,11 +416,8 @@ func (w *Watcher) writeSignalFileToPod(pid int, signalFilePath, checkpointID, ch
}
// writeCheckpointDoneMarker writes a checkpoint.done marker file to the checkpoint directory on shared PVC.
// This file is written AFTER all checkpoint steps complete (including rootfs-diff.tar).
// Restore pods on ANY node check for this file to know the checkpoint is complete and safe to restore.
// This is separate from writeSignalFileToPod which signals the checkpoint job pod to exit.
func
(
w
*
Watcher
)
writeCheckpointDoneMarker
(
checkpointDir
,
checkpointID
string
,
success
bool
,
errMsg
string
,
log
*
logrus
.
Entry
)
{
markerPath
:=
filepath
.
Join
(
checkpointDir
,
"
checkpoint.
done"
)
markerPath
:=
filepath
.
Join
(
checkpointDir
,
checkpoint
.
CheckpointDoneFilename
)
marker
:=
SignalFile
{
CheckpointID
:
checkpointID
,
...
...
deploy/chrek/scripts/smart-entrypoint.sh
deleted
100755 → 0
View file @
b6824ae0
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Smart entrypoint wrapper for CRIU checkpoint/restore
# Automatically detects checkpoints and falls back to cold start if not found
#
# Behavior:
# 1. If DYN_CHECKPOINT_HASH is set and checkpoint exists -> restore
# 2. If WAIT_FOR_CHECKPOINT=1 -> wait for checkpoint (restore-entrypoint handles this)
# 3. Otherwise -> execute provided command (cold start)
set
-e
# Enable debug output if DEBUG=1
if
[
"
${
DEBUG
:-
0
}
"
=
"1"
]
;
then
set
-x
fi
# Configuration from environment
CHECKPOINT_PATH
=
"
${
DYN_CHECKPOINT_PATH
:-
/checkpoints
}
"
CHECKPOINT_HASH
=
"
${
DYN_CHECKPOINT_HASH
:-}
"
WAIT_FOR_CHECKPOINT
=
"
${
WAIT_FOR_CHECKPOINT
:-
0
}
"
# Log function for consistent output
log
()
{
echo
"[smart-entrypoint]
$*
"
>
&2
}
# Check if a checkpoint exists and should be restored
should_restore_checkpoint
()
{
# If WAIT_FOR_CHECKPOINT is set, always use restore-entrypoint
# (it will wait for the checkpoint to appear)
if
[
"
$WAIT_FOR_CHECKPOINT
"
=
"1"
]
;
then
log
"WAIT_FOR_CHECKPOINT=1, will wait for checkpoint via restore-entrypoint"
return
0
fi
# If checkpoint hash is not set, no restore
if
[
-z
"
$CHECKPOINT_HASH
"
]
;
then
log
"DYN_CHECKPOINT_HASH not set, no checkpoint to restore"
return
1
fi
# Check if checkpoint directory exists
CHECKPOINT_DIR
=
"
$CHECKPOINT_PATH
/
$CHECKPOINT_HASH
"
if
[
!
-d
"
$CHECKPOINT_DIR
"
]
;
then
log
"Checkpoint directory not found:
$CHECKPOINT_DIR
"
return
1
fi
# Check for checkpoint.done marker which is written LAST in the checkpoint process
# This is more reliable than inventory.img (created by CRIU) or rootfs-diff.tar (may be mid-write)
# Order: metadata.json -> CRIU dump (*.img) -> rootfs-diff.tar -> checkpoint.done
DONE_MARKER
=
"
$CHECKPOINT_DIR
/checkpoint.done"
if
[
!
-f
"
$DONE_MARKER
"
]
;
then
log
"Checkpoint incomplete - checkpoint.done not found in:
$CHECKPOINT_DIR
"
log
"Checkpoint may still be in progress..."
return
1
fi
log
"Checkpoint found:
$CHECKPOINT_HASH
(checkpoint.done marker present)"
return
0
}
# Main logic
if
should_restore_checkpoint
;
then
log
"=========================================="
log
"CHECKPOINT RESTORE MODE"
log
"=========================================="
log
"Checkpoint:
$CHECKPOINT_HASH
"
log
"Location:
$CHECKPOINT_PATH
/
$CHECKPOINT_HASH
"
log
"Invoking restore-entrypoint..."
log
"=========================================="
# Execute restore-entrypoint
# Any args passed to this script are forwarded (though restore-entrypoint ignores them)
exec
/restore-entrypoint
"
$@
"
else
log
"=========================================="
log
"COLD START MODE"
log
"=========================================="
# No checkpoint found or not requested - fall back to cold start
if
[
$#
-eq
0
]
;
then
# No args provided - this is likely an error
log
"ERROR: No checkpoint to restore and no command provided"
log
"Set DYN_CHECKPOINT_HASH to restore a checkpoint, or provide a command to run"
exit
1
fi
log
"No checkpoint to restore"
log
"Executing command:
$*
"
log
"=========================================="
# Execute the provided command
exec
"
$@
"
fi
deploy/helm/charts/chrek/README.md
View file @
d381e6ff
...
...
@@ -63,7 +63,6 @@ See `values.yaml` for all configuration options.
|
`storage.pvc.name`
| PVC name (must match operator config) |
`chrek-pvc`
|
|
`storage.pvc.size`
| PVC size |
`100Gi`
|
|
`storage.pvc.storageClass`
| Storage class name |
`""`
(default) |
|
`storage.signalHostPath`
| Host path for signal files |
`/var/lib/chrek/signals`
|
|
`daemonset.image.repository`
| DaemonSet image repository |
`nvidia/chrek-agent`
|
|
`daemonset.nodeSelector`
| Node selector for GPU nodes |
`nvidia.com/gpu.present: "true"`
|
|
`daemonset.runtimeClassName`
| Runtime class for GPU access |
`nvidia`
|
...
...
@@ -175,4 +174,3 @@ Ensure your storage class supports `ReadWriteMany` access mode for multi-node de
## License
Apache License 2.0
deploy/helm/charts/chrek/templates/configmap.yaml
0 → 100644
View file @
d381e6ff
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion
:
v1
kind
:
ConfigMap
metadata
:
name
:
{{
include "chrek.fullname" .
}}
-config
namespace
:
{{
.Release.Namespace
}}
labels
:
{{
- include "chrek.labels" . | nindent 4
}}
data
:
config.yaml
:
|
# Chrek Configuration
# This ConfigMap provides static configuration for the checkpoint agent.
# Dynamic values (NODE_NAME, RESTRICTED_NAMESPACE, etc.) come from environment variables.
agent:
# How checkpoints are triggered: "http" for REST API, "watcher" for auto-checkpoint
signalSource: {{ .Values.config.agent.signalSource | quote }}
# Watcher/HTTP server address
listenAddr: {{ .Values.config.agent.listenAddr | quote }}
checkpoint:
# Base path for checkpoint directories (shared PVC mount path)
basePath: {{ .Values.storage.pvc.basePath | quote }}
criu:
# RPC options
ghostLimit: {{ .Values.config.checkpoint.criu.ghostLimit }}
timeout: {{ .Values.config.checkpoint.criu.timeout }}
logLevel: {{ .Values.config.checkpoint.criu.logLevel }}
workDir: {{ .Values.config.checkpoint.criu.workDir | quote }}
# K8s-specific options
leaveRunning: {{ .Values.config.checkpoint.criu.leaveRunning }}
shellJob: {{ .Values.config.checkpoint.criu.shellJob }}
tcpClose: {{ .Values.config.checkpoint.criu.tcpClose }}
fileLocks: {{ .Values.config.checkpoint.criu.fileLocks }}
orphanPtsMaster: {{ .Values.config.checkpoint.criu.orphanPtsMaster }}
extUnixSk: {{ .Values.config.checkpoint.criu.extUnixSk }}
linkRemap: {{ .Values.config.checkpoint.criu.linkRemap }}
extMasters: {{ .Values.config.checkpoint.criu.extMasters }}
manageCgroupsMode: {{ .Values.config.checkpoint.criu.manageCgroupsMode | quote }}
# Advanced options
autoDedup: {{ .Values.config.checkpoint.criu.autoDedup }}
lazyPages: {{ .Values.config.checkpoint.criu.lazyPages }}
# Config file options (NOT available via RPC)
libDir: {{ .Values.config.checkpoint.criu.libDir | quote }}
allowUprobes: {{ .Values.config.checkpoint.criu.allowUprobes }}
skipInFlight: {{ .Values.config.checkpoint.criu.skipInFlight }}
rootfsExclusions:
# System directories excluded from rootfs diff (NVIDIA GPU Operator injected)
systemDirs: {{ toYaml .Values.config.checkpoint.rootfsExclusions.systemDirs | nindent 10 }}
# Cache directories to exclude (reduces checkpoint size)
cacheDirs: {{ toYaml .Values.config.checkpoint.rootfsExclusions.cacheDirs | nindent 10 }}
# Additional custom exclusions
additionalExclusions: {{ toYaml .Values.config.checkpoint.rootfsExclusions.additionalExclusions | nindent 10 }}
# NOTE: Restore runtime configuration is NOT in this ConfigMap.
# Placeholder containers do not mount it. Restore uses hardcoded defaults
# + operator-injected env vars. CRIU options come from saved checkpoint manifest.
deploy/helm/charts/chrek/templates/daemonset.yaml
View file @
d381e6ff
...
...
@@ -76,13 +76,11 @@ spec:
securityContext
:
privileged
:
true
env
:
# Dynamic values from Kubernetes downward API
-
name
:
NODE_NAME
valueFrom
:
fieldRef
:
fieldPath
:
spec.nodeName
# Agent mode: use "watcher" to watch for labeled pods
-
name
:
CHECKPOINT_SIGNAL_FROM
value
:
"
watcher"
{{
- if .Values.rbac.namespaceRestricted
}}
# Restrict pod watching to this namespace (namespace-scoped RBAC)
-
name
:
RESTRICTED_NAMESPACE
...
...
@@ -90,34 +88,11 @@ spec:
fieldRef
:
fieldPath
:
metadata.namespace
{{
- end
}}
# Checkpoint storage directory
-
name
:
CHECKPOINT_DIR
value
:
{{
.Values.storage.pvc.basePath | quote
}}
# Host proc mount point for CRIU operations
-
name
:
HOST_PROC
value
:
"
/host/proc"
# Containerd socket path
-
name
:
CONTAINERD_SOCKET
value
:
{{
.Values.daemonset.containerRuntimeSocket
}}
{{
- if .Values.daemonset.criu.cudaPluginDir
}}
# CUDA plugin directory for GPU checkpoint support
-
name
:
CUDA_PLUGIN_DIR
value
:
{{
.Values.daemonset.criu.cudaPluginDir | quote
}}
{{
- end
}}
{{
- if .Values.daemonset.criu.ghostLimit
}}
# CRIU ghost file size limit in bytes
-
name
:
CRIU_GHOST_LIMIT
value
:
{{
.Values.daemonset.criu.ghostLimit | quote
}}
{{
- end
}}
{{
- if .Values.daemonset.criu.timeout
}}
# CRIU timeout in seconds
-
name
:
CRIU_TIMEOUT
value
:
{{
.Values.daemonset.criu.timeout | quote
}}
{{
- end
}}
# Storage type (for future S3/OCI support)
-
name
:
DYN_CHECKPOINT_STORAGE_TYPE
value
:
{{
.Values.storage.type | quote
}}
volumeMounts
:
# Mount configuration ConfigMap
-
name
:
config
mountPath
:
/etc/chrek
readOnly
:
true
{{
- if eq .Values.storage.type "pvc"
}}
# Mount the checkpoint PVC (only for PVC storage type)
-
name
:
checkpoints
...
...
@@ -155,6 +130,10 @@ spec:
resources
:
{{
- toYaml .Values.daemonset.resources | nindent 12
}}
volumes
:
# Configuration ConfigMap
-
name
:
config
configMap
:
name
:
{{
include "chrek.fullname" .
}}
-config
{{
- if .Values.seccomp.deploy
}}
# Seccomp profile ConfigMap (used by initContainer)
-
name
:
seccomp-profiles
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment