server.go 8.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
//go:build windows || darwin

package server

import (
	"bufio"
	"context"
	"errors"
	"fmt"
	"io"
	"log/slog"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"runtime"
	"strconv"
	"strings"
	"time"

	"github.com/ollama/ollama/app/logrotate"
	"github.com/ollama/ollama/app/store"
)

const restartDelay = time.Second

// Server is a managed ollama server process
type Server struct {
	store *store.Store
	bin   string // resolved path to `ollama`
	log   io.WriteCloser
	dev   bool // true if running with the dev flag
}

type InferenceCompute struct {
	Library string
	Variant string
	Compute string
	Driver  string
	Name    string
	VRAM    string
}

func New(s *store.Store, devMode bool) *Server {
	p := resolvePath("ollama")
	return &Server{store: s, bin: p, dev: devMode}
}

func resolvePath(name string) string {
	// look in the app bundle first
	if exe, _ := os.Executable(); exe != "" {
		var dir string
		if runtime.GOOS == "windows" {
			dir = filepath.Dir(exe)
		} else {
			dir = filepath.Join(filepath.Dir(exe), "..", "Resources")
		}
		if _, err := os.Stat(filepath.Join(dir, name)); err == nil {
			return filepath.Join(dir, name)
		}
	}

	// check the development dist path
	for _, path := range []string{
		filepath.Join("dist", runtime.GOOS, name),
		filepath.Join("dist", runtime.GOOS+"-"+runtime.GOARCH, name),
	} {
		if _, err := os.Stat(path); err == nil {
			return path
		}
	}

	// fallback to system path
	if p, _ := exec.LookPath(name); p != "" {
		return p
	}

	return name
}

// cleanup checks the pid file for a running ollama process
// and shuts it down gracefully if it is running
func cleanup() error {
	data, err := os.ReadFile(pidFile)
	if err != nil {
		if os.IsNotExist(err) {
			return nil
		}
		return err
	}
	defer os.Remove(pidFile)

	pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
	if err != nil {
		return err
	}

	proc, err := os.FindProcess(pid)
	if err != nil {
		return nil
	}

	ok, err := terminated(pid)
	if err != nil {
		slog.Debug("cleanup: error checking if terminated", "pid", pid, "err", err)
	}
	if ok {
		return nil
	}

	slog.Info("detected previous ollama process, cleaning up", "pid", pid)
	return stop(proc)
}

// stop waits for a process with the provided pid to exit by polling
// `terminated(pid)`. If the process has not exited within 5 seconds, it logs a
// warning and kills the process.
func stop(proc *os.Process) error {
	if proc == nil {
		return nil
	}

	if err := terminate(proc); err != nil {
		slog.Warn("graceful terminate failed, killing", "err", err)
		return proc.Kill()
	}

	deadline := time.NewTimer(5 * time.Second)
	defer deadline.Stop()

	for {
		select {
		case <-deadline.C:
			slog.Warn("timeout waiting for graceful shutdown; killing", "pid", proc.Pid)
			return proc.Kill()
		default:
			ok, err := terminated(proc.Pid)
			if err != nil {
				slog.Error("error checking if ollama process is terminated", "err", err)
				return err
			}
			if ok {
				return nil
			}
			time.Sleep(10 * time.Millisecond)
		}
	}
}

func (s *Server) Run(ctx context.Context) error {
	l, err := openRotatingLog()
	if err != nil {
		return err
	}
	s.log = l
	defer s.log.Close()

	if err := cleanup(); err != nil {
		slog.Warn("failed to cleanup previous ollama process", "err", err)
	}

	reaped := false
	for ctx.Err() == nil {
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-time.After(restartDelay):
		}

		cmd, err := s.cmd(ctx)
		if err != nil {
			return err
		}

		if err := cmd.Start(); err != nil {
			return err
		}

		err = os.WriteFile(pidFile, []byte(strconv.Itoa(cmd.Process.Pid)), 0o644)
		if err != nil {
			slog.Warn("failed to write pid file", "file", pidFile, "err", err)
		}

		if err = cmd.Wait(); err != nil && !errors.Is(err, context.Canceled) {
			var exitErr *exec.ExitError
			if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 && !s.dev && !reaped {
				reaped = true
				// This could be a port conflict, try to kill any existing ollama processes
				if err := reapServers(); err != nil {
					slog.Warn("failed to stop existing ollama server", "err", err)
				} else {
					slog.Debug("conflicting server stopped, waiting for port to be released")
					continue
				}
			}
			slog.Error("ollama exited", "err", err)
		}
	}
	return ctx.Err()
}

func (s *Server) cmd(ctx context.Context) (*exec.Cmd, error) {
	settings, err := s.store.Settings()
	if err != nil {
		return nil, err
	}

	cmd := commandContext(ctx, s.bin, "serve")
	cmd.Stdout, cmd.Stderr = s.log, s.log

	// Copy and mutate the environment to merge in settings the user has specified without dups
	env := map[string]string{}
	for _, kv := range os.Environ() {
		s := strings.SplitN(kv, "=", 2)
		env[s[0]] = s[1]
	}
	if settings.Expose {
		env["OLLAMA_HOST"] = "0.0.0.0"
	}
	if settings.Browser {
		env["OLLAMA_ORIGINS"] = "*"
	}
	if settings.Models != "" {
		if _, err := os.Stat(settings.Models); err == nil {
			env["OLLAMA_MODELS"] = settings.Models
		} else {
227
			slog.Warn("models path not accessible, using default", "path", settings.Models, "err", err)
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
		}
	}
	if settings.ContextLength > 0 {
		env["OLLAMA_CONTEXT_LENGTH"] = strconv.Itoa(settings.ContextLength)
	}
	cmd.Env = []string{}
	for k, v := range env {
		cmd.Env = append(cmd.Env, k+"="+v)
	}

	cmd.Cancel = func() error {
		if cmd.Process == nil {
			return nil
		}
		return stop(cmd.Process)
	}

	return cmd, nil
}

func openRotatingLog() (io.WriteCloser, error) {
	// TODO consider rotation based on size or time, not just every server invocation
	dir := filepath.Dir(serverLogPath)
	if err := os.MkdirAll(dir, 0o755); err != nil {
		return nil, fmt.Errorf("create log directory: %w", err)
	}

	logrotate.Rotate(serverLogPath)
	f, err := os.OpenFile(serverLogPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
	if err != nil {
		return nil, fmt.Errorf("open log file: %w", err)
	}
	return f, nil
}

// Attempt to retrieve inference compute information from the server
// log.  Set ctx to timeout to control how long to wait for the logs to appear
func GetInferenceComputer(ctx context.Context) ([]InferenceCompute, error) {
	inference := []InferenceCompute{}
	marker := regexp.MustCompile(`inference compute.*library=`)
	q := `inference compute.*%s=["]([^"]*)["]`
	nq := `inference compute.*%s=(\S+)\s`
	type regex struct {
		q  *regexp.Regexp
		nq *regexp.Regexp
	}
	regexes := map[string]regex{
		"library": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "library")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "library")),
		},
		"variant": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "variant")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "variant")),
		},
		"compute": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "compute")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "compute")),
		},
		"driver": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "driver")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "driver")),
		},
		"name": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "name")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "name")),
		},
		"total": {
			q:  regexp.MustCompile(fmt.Sprintf(q, "total")),
			nq: regexp.MustCompile(fmt.Sprintf(nq, "total")),
		},
	}
	get := func(field, line string) string {
		regex, ok := regexes[field]
		if !ok {
			slog.Warn("missing field", "field", field)
			return ""
		}
		match := regex.q.FindStringSubmatch(line)

		if len(match) > 1 {
			return match[1]
		}
		match = regex.nq.FindStringSubmatch(line)
		if len(match) > 1 {
			return match[1]
		}
		return ""
	}
	for {
		select {
		case <-ctx.Done():
			return nil, fmt.Errorf("timeout scanning server log for inference compute details")
		default:
		}
		file, err := os.Open(serverLogPath)
		if err != nil {
			slog.Debug("failed to open server log", "log", serverLogPath, "error", err)
			time.Sleep(time.Second)
			continue
		}
		defer file.Close()
		scanner := bufio.NewScanner(file)
		for scanner.Scan() {
			line := scanner.Text()
			match := marker.FindStringSubmatch(line)
			if len(match) > 0 {
				ic := InferenceCompute{
					Library: get("library", line),
					Variant: get("variant", line),
					Compute: get("compute", line),
					Driver:  get("driver", line),
					Name:    get("name", line),
					VRAM:    get("total", line),
				}

				slog.Info("Matched", "inference compute", ic)
				inference = append(inference, ic)
			} else {
				// Break out on first non matching line after we start matching
				if len(inference) > 0 {
					return inference, nil
				}
			}
		}
		time.Sleep(100 * time.Millisecond)
	}
}