routes.go 34.4 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
12
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
14
	"net"
	"net/http"
15
	"net/netip"
16
	"os"
17
	"os/signal"
Michael Yang's avatar
Michael Yang committed
18
	"path/filepath"
19
	"slices"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
	"github.com/gin-gonic/gin"

27
	"github.com/ollama/ollama/api"
28
	"github.com/ollama/ollama/envconfig"
29
30
31
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
32
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
33
	"github.com/ollama/ollama/template"
34
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
35
	"github.com/ollama/ollama/types/model"
36
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
37
38
)

Michael Yang's avatar
Michael Yang committed
39
40
var mode string = gin.DebugMode

41
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	addr  net.Addr
	sched *Scheduler
44
45
}

Michael Yang's avatar
Michael Yang committed
46
47
48
49
50
51
52
53
54
55
56
57
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
Michael Yang committed
58
59
var errRequired = errors.New("is required")

60
61
62
63
64
65
66
67
68
69
70
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
71
72
}

Michael Yang's avatar
Michael Yang committed
73
74
75
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
76
	if name == "" {
Michael Yang's avatar
Michael Yang committed
77
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
	}

Michael Yang's avatar
Michael Yang committed
80
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
81
	if err != nil {
Michael Yang's avatar
Michael Yang committed
82
		return nil, nil, nil, err
83
84
	}

Michael Yang's avatar
Michael Yang committed
85
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
86
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
87
88
	}

Michael Yang's avatar
Michael Yang committed
89
	opts, err := modelOptions(model, requestOpts)
90
	if err != nil {
Michael Yang's avatar
Michael Yang committed
91
		return nil, nil, nil, err
92
93
	}

Michael Yang's avatar
Michael Yang committed
94
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
96
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
97
98
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
99
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
100
101
	}

Michael Yang's avatar
Michael Yang committed
102
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
103
104
105
}

func (s *Server) GenerateHandler(c *gin.Context) {
106
	checkpointStart := time.Now()
Michael Yang's avatar
Michael Yang committed
107
108
109
110
111
112
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
115
		return
	}

Michael Yang's avatar
Michael Yang committed
116
117
118
119
120
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
121
122
123
		return
	}

Michael Yang's avatar
Michael Yang committed
124
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
Michael Yang committed
125
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
126
127
128
129
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
130
131
132
133
		handleScheduleError(c, req.Model, err)
		return
	}

134
135
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
136
137
138
139
140
141
142
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
143
144
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
145

Michael Yang's avatar
Michael Yang committed
146
147
148
149
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
150

Michael Yang's avatar
Michael Yang committed
151
152
153
154
155
	prompt := req.Prompt
	if !req.Raw {
		var msgs []api.Message
		if req.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
Michael Yang's avatar
Michael Yang committed
156
157
		} else if m.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: m.System})
158
159
		}

Michael Yang's avatar
Michael Yang committed
160
161
		for _, i := range images {
			msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
Michael Yang's avatar
Michael Yang committed
162
163
		}

Michael Yang's avatar
Michael Yang committed
164
		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
Michael Yang's avatar
Michael Yang committed
165

Michael Yang's avatar
Michael Yang committed
166
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
167
168
169
170
171
172
173
174
175
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		var b bytes.Buffer
Bruce MacDonald's avatar
Bruce MacDonald committed
176
		if req.Context != nil {
Michael Yang's avatar
Michael Yang committed
177
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
178
179
180
181
182
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Michael Yang's avatar
Michael Yang committed
183
			b.WriteString(s)
184
185
		}

Michael Yang's avatar
Michael Yang committed
186
187
188
189
		if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
190

Michael Yang's avatar
Michael Yang committed
191
		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
192
193
	}

Michael Yang's avatar
Michael Yang committed
194
	slog.Debug("generate request", "prompt", prompt, "images", images)
195

Bruce MacDonald's avatar
Bruce MacDonald committed
196
197
	ch := make(chan any)
	go func() {
198
199
		// TODO (jmorganca): avoid building the response twice both here and below
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
200
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
201
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
202
203
204
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
205
			Options: opts,
206
207
		}, func(cr llm.CompletionResponse) {
			res := api.GenerateResponse{
208
209
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
210
211
212
				Response:   cr.Content,
				Done:       cr.Done,
				DoneReason: cr.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
213
				Metrics: api.Metrics{
214
215
216
217
					PromptEvalCount:    cr.PromptEvalCount,
					PromptEvalDuration: cr.PromptEvalDuration,
					EvalCount:          cr.EvalCount,
					EvalDuration:       cr.EvalDuration,
Bruce MacDonald's avatar
Bruce MacDonald committed
218
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
219
			}
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239

			if _, err := sb.WriteString(cr.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
			}

			if cr.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
					tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
					res.Context = append(req.Context, tokens...)
				}
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
240
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
241
242
243
244
245
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
246
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
247
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
248
249
		for rr := range ch {
			switch t := rr.(type) {
250
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
251
252
				sb.WriteString(t.Response)
				r = t
253
			case gin.H:
Michael Yang's avatar
Michael Yang committed
254
255
256
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
257
				}
Michael Yang's avatar
Michael Yang committed
258
259
260

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
261
			default:
Michael Yang's avatar
Michael Yang committed
262
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
263
264
265
				return
			}
		}
266

Michael Yang's avatar
Michael Yang committed
267
268
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
269
270
271
272
273
274
		return
	}

	streamResponse(c, ch)
}

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
func (s *Server) EmbedHandler(c *gin.Context) {
	var req api.EmbedRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	truncate := true

	if req.Truncate != nil && !*req.Truncate {
		truncate = false
	}

	var input []string

	switch i := req.Input.(type) {
	case string:
		if len(i) > 0 {
			input = append(input, i)
		}
	case []any:
		for _, v := range i {
			if _, ok := v.(string); !ok {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
				return
			}
			input = append(input, v.(string))
		}
	default:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
		return
	}

	if len(input) == 0 {
		c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
		return
	}

	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
	if err != nil {
		handleScheduleError(c, req.Model, err)
		return
	}

	kvData, err := getKVData(m.ModelPath, false)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	for i, s := range input {
		tokens, err := r.Tokenize(c.Request.Context(), s)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
		if len(tokens) > ctxLen {
			if !truncate {
				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
				return
			}

			tokens = tokens[:ctxLen]
			s, err = r.Detokenize(c.Request.Context(), tokens)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		input[i] = s
	}
	embeddings, err := r.Embed(c.Request.Context(), input)

	if err != nil {
		slog.Error("embedding generation failed", "error", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	for i, e := range embeddings {
		embeddings[i] = normalize(e)
	}

	resp := api.EmbedResponse{
		Model:      req.Model,
		Embeddings: embeddings,
	}
	c.JSON(http.StatusOK, resp)
}

func normalize(vec []float32) []float32 {
	var sum float32
	for _, v := range vec {
		sum += v * v
	}

	norm := float32(0.0)
	if sum > 0 {
		norm = float32(1.0 / math.Sqrt(float64(sum)))
	}

	for i := range vec {
		vec[i] *= norm
	}
	return vec
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
390
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
391
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
392
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
393
394
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
395
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
396
397
398
399
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
400
	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
401
	if err != nil {
Michael Yang's avatar
Michael Yang committed
402
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
403
404
405
		return
	}

406
407
408
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
409
410
411
		return
	}

412
413
	embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})

Bruce MacDonald's avatar
Bruce MacDonald committed
414
	if err != nil {
415
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
416
417
418
419
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

420
421
422
423
424
425
426
427
428
429
	embedding := make([]float64, len(embeddings[0]))

	for i, v := range embeddings[0] {
		embedding[i] = float64(v)
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
430
431
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
432
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
433
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
434
435
436
437
438
439
440
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
441
442
443
		return
	}

444
445
446
447
448
449
450
451
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
452
453
454
		return
	}

455
456
457
	ch := make(chan any)
	go func() {
		defer close(ch)
458
459
		fn := func(r api.ProgressResponse) {
			ch <- r
460
		}
461

Michael Yang's avatar
Michael Yang committed
462
		regOpts := &registryOptions{
463
464
465
			Insecure: req.Insecure,
		}

466
467
468
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

469
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
470
			ch <- gin.H{"error": err.Error()}
471
472
473
		}
	}()

474
475
476
477
478
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

479
480
481
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
482
func (s *Server) PushModelHandler(c *gin.Context) {
483
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
484
485
486
487
488
489
490
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
491
492
		return
	}
Michael Yang's avatar
Michael Yang committed
493

Michael Yang's avatar
Michael Yang committed
494
495
496
497
498
499
500
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
501
502
503
		return
	}

504
505
506
	ch := make(chan any)
	go func() {
		defer close(ch)
507
508
		fn := func(r api.ProgressResponse) {
			ch <- r
509
		}
510

Michael Yang's avatar
Michael Yang committed
511
		regOpts := &registryOptions{
512
513
514
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
515
516
517
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
518
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
519
			ch <- gin.H{"error": err.Error()}
520
521
522
		}
	}()

523
524
525
526
527
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

528
529
530
	streamResponse(c, ch)
}

531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
546
func (s *Server) CreateModelHandler(c *gin.Context) {
547
548
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
549
550
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
551
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
552
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
553
		return
554
555
	}

556
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
557
	if !name.IsValid() {
558
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
559
560
561
		return
	}

562
563
564
565
566
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

567
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
568
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
569
570
		return
	}
Michael Yang's avatar
Michael Yang committed
571

572
573
574
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
575
576
577
578
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
579
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
580

581
		sr = f
Michael Yang's avatar
Michael Yang committed
582
	}
Michael Yang's avatar
Michael Yang committed
583

584
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
585
586
587
588
589
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
590
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
591
592
	go func() {
		defer close(ch)
593
594
		fn := func(resp api.ProgressResponse) {
			ch <- resp
595
596
		}

597
598
599
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

600
601
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
602
			ch <- gin.H{"error": err.Error()}
603
		}
Michael Yang's avatar
Michael Yang committed
604
	}()
Michael Yang's avatar
Michael Yang committed
605

606
	if r.Stream != nil && !*r.Stream {
607
608
609
610
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
611
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
612
613
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
614
func (s *Server) DeleteModelHandler(c *gin.Context) {
615
616
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
617
618
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
619
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
620
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
621
622
623
		return
	}

624
625
626
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
627
628
		return
	}
Michael Yang's avatar
Michael Yang committed
629

630
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
631
632
633
634
635
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

636
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
637
638
639
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
640
641
642
643
644

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
645
646
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
647
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
648
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
649
650
651
652
653
654
655
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
656
657
658
		return
	}

Michael Yang's avatar
Michael Yang committed
659
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
660
		// noop
Michael Yang's avatar
Michael Yang committed
661
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
662
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
663
	} else {
664
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
665
666
667
		return
	}

668
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
669
	if err != nil {
670
671
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
672
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
673
674
675
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
676
677
678
679
680
681
682
683
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

684
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
685
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
686
687
688
689
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
690
	modelDetails := api.ModelDetails{
691
692
693
694
695
696
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
697
698
	}

699
	if req.System != "" {
700
		m.System = req.System
701
702
	}

Michael Yang's avatar
Michael Yang committed
703
704
705
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
706
707
	}

708
709
710
711
712
713
714
715
716
717
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
718
	resp := &api.ShowResponse{
719
720
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
721
		Template:   m.Template.String(),
722
723
724
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
725
726
727
728
	}

	var params []string
	cs := 30
729
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
730
731
732
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
733
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
734
			}
Patrick Devine's avatar
Patrick Devine committed
735
736
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
737
738
739
740
		}
	}
	resp.Parameters = strings.Join(params, "\n")

741
742
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
743
			m.Options[k] = v
744
745
746
		}
	}

747
	var sb strings.Builder
748
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
749
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
750
751
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
752
	resp.Modelfile = sb.String()
753

754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
770
771
772
	return resp, nil
}

773
func getKVData(digest string, verbose bool) (llm.KV, error) {
774
775
776
777
778
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
796
func (s *Server) ListModelsHandler(c *gin.Context) {
797
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
798
799
800
801
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
802

803
	models := []api.ListModelResponse{}
804
805
806
807
808
809
810
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
811

812
813
814
815
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
816
		}
Michael Yang's avatar
Michael Yang committed
817

818
		// tag should never be masked
819
		models = append(models, api.ListModelResponse{
820
821
822
823
824
825
826
827
828
829
830
831
832
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
833
834
	}

835
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
836
837
838
839
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
840
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
841
842
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
843
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
844
845
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
846
847
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
848
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
849
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
850
851
852
		return
	}

Michael Yang's avatar
Michael Yang committed
853
854
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
855
856
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
857
858
	}

Michael Yang's avatar
Michael Yang committed
859
860
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
861
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
862
863
		return
	}
Michael Yang's avatar
Michael Yang committed
864

865
866
867
868
869
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
870
871
872
873
874
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
875
876
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
877
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
878
879
880
881
882
883
884
885
886
887
888
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
889
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
890
891
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
892
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
893
894
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
895
896
897
898
899
900
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
901
902
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
903
904
905
906
907
908
909
910
911
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

930
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
931
932
933
934
935
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

936
937
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
938
939
940
		return
	}

Michael Yang's avatar
Michael Yang committed
941
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
942
943
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

965
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
966
	if host == "" || host == "localhost" {
967
968
969
970
971
972
973
974
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
975
976
977
		"localhost",
		"local",
		"internal",
978
	}
979

Jeffrey Morgan's avatar
Jeffrey Morgan committed
980
	// check if the host is a local TLD
981
982
983
984
985
986
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
987
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
988
}
989

Jeffrey Morgan's avatar
Jeffrey Morgan committed
990
991
992
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
993
994
995
996
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
997
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
998
999
1000
1001
1002
1003
1004
1005
1006
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1007
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1008
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1009
1010
1011
1012
1013
				c.Next()
				return
			}
		}

1014
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
1015
			if c.Request.Method == http.MethodOptions {
1016
1017
1018
1019
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

1020
1021
1022
1023
1024
1025
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
1026
}
1027

1028
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
1029
1030
	config := cors.DefaultConfig()
	config.AllowWildcard = true
1031
	config.AllowBrowserExtensions = true
1032
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
1033
1034
1035
1036
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
1037
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
1038

Bruce MacDonald's avatar
Bruce MacDonald committed
1039
	r := gin.Default()
1040
1041
	r.Use(
		cors.New(config),
1042
		allowedHostsMiddleware(s.addr),
1043
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
1044

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1045
1046
1047
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
1048
	r.POST("/api/embed", s.EmbedHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1049
1050
1051
1052
1053
1054
1055
1056
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1057
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1058

1059
	// Compatibility endpoints
1060
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1061
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1062
1063
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
1064

Michael Yang's avatar
Michael Yang committed
1065
1066
1067
1068
1069
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1070
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
1071
1072
1073
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
1074
1075
	}

1076
1077
1078
1079
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
1080
	level := slog.LevelInfo
1081
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
1082
		level = slog.LevelDebug
1083
	}
Michael Yang's avatar
Michael Yang committed
1084

1085
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

1101
1102
1103
1104
1105
1106
1107
1108
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

1109
	if !envconfig.NoPrune {
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1125
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1126
1127
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1128
	s := &Server{addr: ln.Addr(), sched: sched}
1129
1130

	http.Handle("/", s.GenerateRoutes())
1131

1132
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1133
	srvr := &http.Server{
1134
1135
1136
1137
1138
1139
1140
1141
1142
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1143
1144
	}

1145
1146
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1147
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1148
1149
	go func() {
		<-signals
1150
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1151
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1152
		sched.unloadAllRunners()
1153
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1154
		done()
1155
1156
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1157
	if err := llm.Init(); err != nil {
1158
1159
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1160

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1161
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1162
1163
1164

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1165
1166
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1167

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1168
1169
1170
1171
1172
1173
1174
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1175
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1176
}
Michael Yang's avatar
Michael Yang committed
1177

1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1203
func streamResponse(c *gin.Context, ch chan any) {
1204
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1205
1206
1207
1208
1209
1210
1211
1212
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1213
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1214
1215
1216
			return false
		}

1217
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1218
1219
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1220
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1221
1222
1223
1224
1225
1226
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1227

1228
func (s *Server) ProcessHandler(c *gin.Context) {
1229
	models := []api.ProcessModelResponse{}
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1241
		mr := api.ProcessModelResponse{
1242
1243
1244
1245
1246
1247
1248
1249
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1250
1251
1252
1253
1254
1255
1256
1257
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1258
1259
1260
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1261
1262
1263
1264
1265
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1266
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1267
1268
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1269
func (s *Server) ChatHandler(c *gin.Context) {
1270
1271
	checkpointStart := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
1272
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1273
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1274
1275
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1276
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1277
1278
1279
1280
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1281
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
Michael Yang committed
1282
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1283
1284
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1285
		return
Michael Yang's avatar
Michael Yang committed
1286
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1287
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1288
1289
		return
	}
Michael Yang's avatar
Michael Yang committed
1290

1291
1292
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
1293
1294
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1295
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1296
1297
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1298
1299
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1300
		})
1301
1302
1303
		return
	}

1304
1305
1306
1307
	if req.Messages[0].Role != "system" {
		req.Messages = append([]api.Message{{Role: "system", Content: m.System}}, req.Messages...)
	}

Michael Yang's avatar
Michael Yang committed
1308
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages)
Michael Yang's avatar
Michael Yang committed
1309
1310
1311
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1312
1313
	}

Michael Yang's avatar
Michael Yang committed
1314
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1315

Bruce MacDonald's avatar
Bruce MacDonald committed
1316
1317
1318
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1319
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1320
1321
1322
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1323
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1324
		}, func(r llm.CompletionResponse) {
1325
			res := api.ChatResponse{
1326
1327
1328
1329
1330
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1331
1332
1333
1334
1335
1336
1337
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
1338
1339
1340
1341
1342
1343
1344

			if r.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
1345
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1346
1347
1348
1349
1350
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
1351
		var r api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1352
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1353
1354
		for rr := range ch {
			switch t := rr.(type) {
1355
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1356
1357
				sb.WriteString(t.Message.Content)
				r = t
1358
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1359
1360
1361
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1362
				}
Michael Yang's avatar
Michael Yang committed
1363
1364
1365

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1366
			default:
Michael Yang's avatar
Michael Yang committed
1367
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1368
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1369
1370
			}
		}
1371

Michael Yang's avatar
Michael Yang committed
1372
1373
		r.Message.Content = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
1374
1375
1376
1377
1378
		return
	}

	streamResponse(c, ch)
}
1379

Michael Yang's avatar
Michael Yang committed
1380
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1381
	switch {
Michael Yang's avatar
Michael Yang committed
1382
1383
	case errors.Is(err, errRequired):
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1384
	case errors.Is(err, context.Canceled):
1385
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1386
	case errors.Is(err, ErrMaxQueue):
1387
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1388
1389
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1390
1391
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1392
1393
	}
}