// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package runtime import ( "fmt" "os" "path/filepath" "strconv" snapshotprotocol "github.com/ai-dynamo/dynamo/deploy/snapshot/protocol" ) // WriteControlSentinel writes a sentinel file into the workload container's // snapshot-control volume at SnapshotControlMountPath/, accessed through // the agent's /host/proc//root view of the container's mount namespace. // // hostPID must be a PID inside the container's mount namespace (the container // task PID is the canonical choice). The sentinel is observed by the workload // via inotify on the control directory; it replaces the SIGUSR1/SIGCONT // agent-to-workload signals that previously required the workload to run as // PID 1. // // The write uses create-then-rename so the workload never observes a partial // file. func WriteControlSentinel(hostPID int, name string) error { if hostPID <= 0 { return fmt.Errorf("invalid host PID %d for control sentinel %q", hostPID, name) } dir := filepath.Join(HostProcPath, strconv.Itoa(hostPID), "root", snapshotprotocol.SnapshotControlMountPath) return writeSentinelInDir(dir, name) } func writeSentinelInDir(dir, name string) error { tmpPath := filepath.Join(dir, "."+name+".tmp") finalPath := filepath.Join(dir, name) if err := os.WriteFile(tmpPath, []byte("done\n"), 0o644); err != nil { return fmt.Errorf("write temp sentinel %s: %w", tmpPath, err) } if err := os.Rename(tmpPath, finalPath); err != nil { _ = os.Remove(tmpPath) return fmt.Errorf("rename sentinel %s -> %s: %w", tmpPath, finalPath, err) } return nil }