server.go 2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// server.go provides the HTTP server for the checkpoint agent.
package httpApiServer

import (
	"context"
	"log"
	"net/http"
	"time"

	"github.com/ai-dynamo/dynamo/deploy/chrek/pkg/checkpoint"
)

// ServerConfig holds the configuration for the HTTP API server.
type ServerConfig struct {
	ListenAddr     string
	NodeName       string
	CheckpointSpec *checkpoint.CheckpointSpec
}

// Server is the HTTP API server for checkpoint operations.
type Server struct {
	cfg        ServerConfig
	handlers   *Handlers
	httpServer *http.Server
}

// NewServer creates a new Server instance.
func NewServer(cfg ServerConfig, checkpointer *checkpoint.Checkpointer) *Server {
	handlers := NewHandlers(cfg, checkpointer)

	// Setup routes
	mux := http.NewServeMux()
	mux.HandleFunc("/health", handlers.HandleHealth)
	mux.HandleFunc("/checkpoint", handlers.HandleCheckpoint)
	mux.HandleFunc("/checkpoints", handlers.HandleListCheckpoints)

	// WriteTimeout must exceed the CRIU checkpoint timeout since /checkpoint
	// blocks until the dump completes. Add 60s buffer for pre/post work.
	writeTimeout := time.Duration(cfg.CheckpointSpec.CRIU.Timeout)*time.Second + 60*time.Second
	if writeTimeout < 300*time.Second {
		writeTimeout = 300 * time.Second
	}

	httpServer := &http.Server{
		Addr:         cfg.ListenAddr,
		Handler:      LoggingMiddleware(mux),
		ReadTimeout:  30 * time.Second,
		WriteTimeout: writeTimeout,
		IdleTimeout:  120 * time.Second,
	}

	return &Server{
		cfg:        cfg,
		handlers:   handlers,
		httpServer: httpServer,
	}
}

// Start starts the HTTP server.
// This method blocks until the server is shut down.
func (s *Server) Start() error {
	log.Printf("HTTP API server listening on %s", s.cfg.ListenAddr)
	return s.httpServer.ListenAndServe()
}

// Shutdown gracefully shuts down the server.
func (s *Server) Shutdown(ctx context.Context) error {
	log.Println("Shutting down HTTP server...")
	return s.httpServer.Shutdown(ctx)
}

// Addr returns the server's listen address.
func (s *Server) Addr() string {
	return s.cfg.ListenAddr
}