routes.go 40.9 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
10
	"image"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
	"io"
12
	"io/fs"
13
	"log/slog"
14
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
15
16
	"net"
	"net/http"
17
	"net/netip"
18
	"os"
19
	"os/signal"
20
	"regexp"
21
	"slices"
Michael Yang's avatar
Michael Yang committed
22
	"strings"
23
	"syscall"
24
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25

Michael Yang's avatar
Michael Yang committed
26
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
	"github.com/gin-gonic/gin"
28
	"golang.org/x/image/webp"
29
	"golang.org/x/sync/errgroup"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
30

31
	"github.com/ollama/ollama/api"
32
	"github.com/ollama/ollama/discover"
33
	"github.com/ollama/ollama/envconfig"
Michael Yang's avatar
Michael Yang committed
34
	"github.com/ollama/ollama/fs/ggml"
35
	"github.com/ollama/ollama/llm"
36
	"github.com/ollama/ollama/logutil"
37
	"github.com/ollama/ollama/openai"
38
39
	"github.com/ollama/ollama/server/internal/client/ollama"
	"github.com/ollama/ollama/server/internal/registry"
Michael Yang's avatar
Michael Yang committed
40
	"github.com/ollama/ollama/template"
41
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
42
	"github.com/ollama/ollama/types/model"
43
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
44
45
)

46
47
48
49
50
51
func experimentEnabled(name string) bool {
	return slices.Contains(strings.Split(os.Getenv("OLLAMA_EXPERIMENT"), ","), name)
}

var useClient2 = experimentEnabled("client2")

Michael Yang's avatar
Michael Yang committed
52
53
var mode string = gin.DebugMode

54
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
56
	addr  net.Addr
	sched *Scheduler
57
58
}

Michael Yang's avatar
Michael Yang committed
59
60
61
62
63
64
65
66
67
68
69
70
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
lint  
Michael Yang committed
71
72
73
74
var (
	errRequired    = errors.New("is required")
	errBadTemplate = errors.New("template error")
)
Michael Yang's avatar
Michael Yang committed
75

76
func modelOptions(model *Model, requestOpts map[string]any) (api.Options, error) {
77
78
79
80
81
82
83
84
85
86
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
87
88
}

Michael Yang's avatar
Michael Yang committed
89
90
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
91
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
92
	if name == "" {
Michael Yang's avatar
Michael Yang committed
93
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
94
95
	}

Michael Yang's avatar
Michael Yang committed
96
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
97
	if err != nil {
Michael Yang's avatar
Michael Yang committed
98
		return nil, nil, nil, err
99
100
	}

101
102
103
104
	if slices.Contains(model.Config.ModelFamilies, "mllama") && len(model.ProjectorPaths) > 0 {
		return nil, nil, nil, fmt.Errorf("'llama3.2-vision' is no longer compatible with your version of Ollama and has been replaced by a newer version. To re-download, run 'ollama pull llama3.2-vision'")
	}

Michael Yang's avatar
Michael Yang committed
105
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
106
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
107
108
	}

Michael Yang's avatar
Michael Yang committed
109
	opts, err := modelOptions(model, requestOpts)
110
	if err != nil {
Michael Yang's avatar
Michael Yang committed
111
		return nil, nil, nil, err
112
113
	}

Michael Yang's avatar
Michael Yang committed
114
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
115
116
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
117
118
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
119
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
120
121
	}

Michael Yang's avatar
Michael Yang committed
122
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
123
124
125
}

func (s *Server) GenerateHandler(c *gin.Context) {
126
	checkpointStart := time.Now()
Michael Yang's avatar
Michael Yang committed
127
128
129
130
131
132
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
133
134
135
		return
	}

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
	name := model.ParseName(req.Model)
	if !name.IsValid() {
		// Ideally this is "invalid model name" but we're keeping with
		// what the API currently returns until we can change it.
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
		return
	}

	// We cannot currently consolidate this into GetModel because all we'll
	// induce infinite recursion given the current code structure.
	name, err := getExistingName(name)
	if err != nil {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
		return
	}

152
	m, err := GetModel(name.String())
153
154
	if err != nil {
		switch {
155
		case errors.Is(err, fs.ErrNotExist):
156
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
157
		case err.Error() == errtypes.InvalidModelNameErrMsg:
158
159
160
161
162
163
164
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

Patrick Devine's avatar
Patrick Devine committed
165
166
	// expire the runner
	if req.Prompt == "" && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
167
		s.sched.expireRunner(m)
Patrick Devine's avatar
Patrick Devine committed
168
169
170
171
172
173
174
175
176
177
178

		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Response:   "",
			Done:       true,
			DoneReason: "unload",
		})
		return
	}

179
	if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
Michael Yang's avatar
Michael Yang committed
180
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
181
182
183
		return
	}

184
	caps := []model.Capability{model.CapabilityCompletion}
185
	if req.Suffix != "" {
186
		caps = append(caps, model.CapabilityInsert)
187
188
	}

189
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
190
191
192
193
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
194
195
196
197
		handleScheduleError(c, req.Model, err)
		return
	}

198
199
	checkpointLoaded := time.Now()

200
	// load the model
Michael Yang's avatar
Michael Yang committed
201
202
203
204
205
206
207
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
208
209
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
210

211
212
	if slices.Contains(m.Config.ModelFamilies, "mllama") && len(req.Images) > 1 {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "this model only supports one image while more than one image requested"})
213
214
215
		return
	}

Michael Yang's avatar
Michael Yang committed
216
217
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
218
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
Michael Yang's avatar
Michael Yang committed
219
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
220

Michael Yang's avatar
Michael Yang committed
221
222
	prompt := req.Prompt
	if !req.Raw {
Michael Yang's avatar
Michael Yang committed
223
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
224
225
226
227
228
229
230
231
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

232
233
234
235
236
237
238
239
240
241
242
243
		var values template.Values
		if req.Suffix != "" {
			values.Prompt = prompt
			values.Suffix = req.Suffix
		} else {
			var msgs []api.Message
			if req.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: req.System})
			} else if m.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: m.System})
			}

Michael Yang's avatar
Michael Yang committed
244
245
246
247
			if req.Context == nil {
				msgs = append(msgs, m.Messages...)
			}

248
			for _, i := range images {
249
250
				imgPrompt := ""
				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]"+imgPrompt, i.ID)})
251
252
253
254
255
			}

			values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
		}

Michael Yang's avatar
Michael Yang committed
256
257
		var b bytes.Buffer
		if req.Context != nil {
258
			slog.Warn("the context field is deprecated and will be removed in a future version of Ollama")
259
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Michael Yang's avatar
Michael Yang committed
260
261
262
263
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
264
			b.WriteString(s)
Michael Yang's avatar
Michael Yang committed
265
		}
266
267
268
269
270
271
272

		if err := tmpl.Execute(&b, values); err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
273
274
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
275
276
	ch := make(chan any)
	go func() {
277
278
		// TODO (jmorganca): avoid building the response twice both here and below
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
279
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
280
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
281
282
			Prompt:  prompt,
			Images:  images,
283
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
284
			Options: opts,
285
286
		}, func(cr llm.CompletionResponse) {
			res := api.GenerateResponse{
287
288
289
290
				Model:     req.Model,
				CreatedAt: time.Now().UTC(),
				Response:  cr.Content,
				Done:      cr.Done,
Bruce MacDonald's avatar
Bruce MacDonald committed
291
				Metrics: api.Metrics{
292
293
294
295
					PromptEvalCount:    cr.PromptEvalCount,
					PromptEvalDuration: cr.PromptEvalDuration,
					EvalCount:          cr.EvalCount,
					EvalDuration:       cr.EvalDuration,
Bruce MacDonald's avatar
Bruce MacDonald committed
296
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
297
			}
298
299
300
301
302
303

			if _, err := sb.WriteString(cr.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
			}

			if cr.Done {
304
				res.DoneReason = cr.DoneReason.String()
305
306
307
308
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
309
					tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
310
311
312
313
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
314
					res.Context = tokens
315
316
317
318
				}
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
319
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
320
321
322
323
324
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
325
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
326
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
327
328
		for rr := range ch {
			switch t := rr.(type) {
329
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
330
331
				sb.WriteString(t.Response)
				r = t
332
			case gin.H:
Michael Yang's avatar
Michael Yang committed
333
334
335
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
336
				}
Michael Yang's avatar
Michael Yang committed
337
338
339

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
340
			default:
Michael Yang's avatar
Michael Yang committed
341
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
342
343
344
				return
			}
		}
345

Michael Yang's avatar
Michael Yang committed
346
347
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
348
349
350
351
352
353
		return
	}

	streamResponse(c, ch)
}

354
func (s *Server) EmbedHandler(c *gin.Context) {
355
	checkpointStart := time.Now()
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
	var req api.EmbedRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	truncate := true

	if req.Truncate != nil && !*req.Truncate {
		truncate = false
	}

	var input []string

	switch i := req.Input.(type) {
	case string:
		if len(i) > 0 {
			input = append(input, i)
		}
	case []any:
		for _, v := range i {
			if _, ok := v.(string); !ok {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
				return
			}
			input = append(input, v.(string))
		}
	default:
389
390
391
392
		if req.Input != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
			return
		}
393
394
	}

395
396
397
398
399
400
	name, err := getExistingName(model.ParseName(req.Model))
	if err != nil {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
		return
	}

401
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
402
403
404
405
406
	if err != nil {
		handleScheduleError(c, req.Model, err)
		return
	}

407
408
	checkpointLoaded := time.Now()

409
410
411
412
413
	if len(input) == 0 {
		c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
		return
	}

414
	kvData, _, err := getModelData(m.ModelPath, false)
415
416
417
418
419
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

420
	var count int
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
	for i, s := range input {
		tokens, err := r.Tokenize(c.Request.Context(), s)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
		if len(tokens) > ctxLen {
			if !truncate {
				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
				return
			}

			tokens = tokens[:ctxLen]
			s, err = r.Detokenize(c.Request.Context(), tokens)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

443
444
		count += len(tokens)

445
446
		input[i] = s
	}
447
448
449
450
451
452
453
454
455
456
457
458

	var g errgroup.Group
	embeddings := make([][]float32, len(input))
	for i, text := range input {
		g.Go(func() error {
			embedding, err := r.Embedding(c.Request.Context(), text)
			if err != nil {
				return err
			}
			embeddings[i] = normalize(embedding)
			return nil
		})
459
460
	}

461
	if err := g.Wait(); err != nil {
462
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": strings.TrimSpace(err.Error())})
463
		return
464
465
466
	}

	resp := api.EmbedResponse{
467
		Model:           req.Model,
468
		Embeddings:      embeddings,
469
470
		TotalDuration:   time.Since(checkpointStart),
		LoadDuration:    checkpointLoaded.Sub(checkpointStart),
471
		PromptEvalCount: count,
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
	}
	c.JSON(http.StatusOK, resp)
}

func normalize(vec []float32) []float32 {
	var sum float32
	for _, v := range vec {
		sum += v * v
	}

	norm := float32(0.0)
	if sum > 0 {
		norm = float32(1.0 / math.Sqrt(float64(sum)))
	}

	for i := range vec {
		vec[i] *= norm
	}
	return vec
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
493
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
494
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
495
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
496
497
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
498
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
499
500
501
502
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

503
504
505
506
507
508
	name := model.ParseName(req.Model)
	if !name.IsValid() {
		c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

509
	r, _, _, err := s.scheduleRunner(c.Request.Context(), name.String(), []model.Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
510
	if err != nil {
Michael Yang's avatar
Michael Yang committed
511
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
512
513
514
		return
	}

515
516
517
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
518
519
520
		return
	}

521
	embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
522
	if err != nil {
523
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": strings.TrimSpace(err.Error())})
Bruce MacDonald's avatar
Bruce MacDonald committed
524
525
526
		return
	}

527
528
529
	var e []float64
	for _, v := range embedding {
		e = append(e, float64(v))
530
531
532
	}

	resp := api.EmbeddingResponse{
533
		Embedding: e,
534
535
	}
	c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
536
537
}

538
func (s *Server) PullHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
539
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
540
541
542
543
544
545
546
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
547
548
549
		return
	}

550
551
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
552
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
553
554
555
		return
	}

556
557
	name, err = getExistingName(name)
	if err != nil {
558
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
559
560
561
		return
	}

562
563
564
	ch := make(chan any)
	go func() {
		defer close(ch)
565
566
		fn := func(r api.ProgressResponse) {
			ch <- r
567
		}
568

Michael Yang's avatar
Michael Yang committed
569
		regOpts := &registryOptions{
570
571
572
			Insecure: req.Insecure,
		}

573
574
575
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

576
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
577
			ch <- gin.H{"error": err.Error()}
578
579
580
		}
	}()

581
582
583
584
585
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

586
587
588
	streamResponse(c, ch)
}

589
func (s *Server) PushHandler(c *gin.Context) {
590
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
591
592
593
594
595
596
597
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
598
599
		return
	}
Michael Yang's avatar
Michael Yang committed
600

601
	var mname string
Michael Yang's avatar
Michael Yang committed
602
	if req.Model != "" {
603
		mname = req.Model
Michael Yang's avatar
Michael Yang committed
604
	} else if req.Name != "" {
605
		mname = req.Name
Michael Yang's avatar
Michael Yang committed
606
607
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
608
609
610
		return
	}

611
612
613
	ch := make(chan any)
	go func() {
		defer close(ch)
614
615
		fn := func(r api.ProgressResponse) {
			ch <- r
616
		}
617

Michael Yang's avatar
Michael Yang committed
618
		regOpts := &registryOptions{
619
620
621
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
622
623
624
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

625
626
627
628
629
630
631
		name, err := getExistingName(model.ParseName(mname))
		if err != nil {
			ch <- gin.H{"error": err.Error()}
			return
		}

		if err := PushModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
632
			ch <- gin.H{"error": err.Error()}
633
634
635
		}
	}()

636
637
638
639
640
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

641
642
643
	streamResponse(c, ch)
}

644
645
646
647
// getExistingName searches the models directory for the longest prefix match of
// the input name and returns the input name with all existing parts replaced
// with each part found. If no parts are found, the input name is returned as
// is.
648
649
650
func getExistingName(n model.Name) (model.Name, error) {
	var zero model.Name
	existing, err := Manifests(true)
651
	if err != nil {
652
		return zero, err
653
	}
654
	var set model.Name // tracks parts already canonicalized
655
	for e := range existing {
656
657
658
659
660
661
662
663
664
665
666
		if set.Host == "" && strings.EqualFold(e.Host, n.Host) {
			n.Host = e.Host
		}
		if set.Namespace == "" && strings.EqualFold(e.Namespace, n.Namespace) {
			n.Namespace = e.Namespace
		}
		if set.Model == "" && strings.EqualFold(e.Model, n.Model) {
			n.Model = e.Model
		}
		if set.Tag == "" && strings.EqualFold(e.Tag, n.Tag) {
			n.Tag = e.Tag
667
668
		}
	}
669
	return n, nil
670
671
}

672
func (s *Server) DeleteHandler(c *gin.Context) {
673
674
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
675
676
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
677
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
678
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
679
680
681
		return
	}

682
683
684
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
685
686
		return
	}
Michael Yang's avatar
Michael Yang committed
687

688
689
690
691
692
693
	n, err := getExistingName(n)
	if err != nil {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
		return
	}

694
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
695
	if err != nil {
696
697
698
699
700
701
		switch {
		case os.IsNotExist(err):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
Michael Yang's avatar
Michael Yang committed
702
703
704
		return
	}

705
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
706
707
708
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
709
710
711
712
713

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
714
715
}

716
func (s *Server) ShowHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
717
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
718
719
720
721
722
723
724
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
725
726
727
		return
	}

Michael Yang's avatar
Michael Yang committed
728
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
729
		// noop
Michael Yang's avatar
Michael Yang committed
730
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
731
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
732
	} else {
733
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
734
735
736
		return
	}

737
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
738
	if err != nil {
739
740
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
741
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
742
		case err.Error() == errtypes.InvalidModelNameErrMsg:
743
744
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
745
746
747
748
749
750
751
752
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

753
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
754
755
	name := model.ParseName(req.Model)
	if !name.IsValid() {
CYJiang's avatar
CYJiang committed
756
		return nil, ErrModelPathInvalid
757
758
759
760
761
762
763
	}
	name, err := getExistingName(name)
	if err != nil {
		return nil, err
	}

	m, err := GetModel(name.String())
Patrick Devine's avatar
Patrick Devine committed
764
765
766
767
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
768
	modelDetails := api.ModelDetails{
769
770
771
772
773
774
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
775
776
	}

777
	if req.System != "" {
778
		m.System = req.System
779
780
	}

Michael Yang's avatar
Michael Yang committed
781
782
783
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
784
785
	}

786
	manifest, err := ParseNamedManifest(name)
787
788
789
790
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
791
	resp := &api.ShowResponse{
792
793
794
795
796
797
798
		License:      strings.Join(m.License, "\n"),
		System:       m.System,
		Template:     m.Template.String(),
		Details:      modelDetails,
		Messages:     msgs,
		Capabilities: m.Capabilities(),
		ModifiedAt:   manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
799
800
801
802
	}

	var params []string
	cs := 30
803
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
804
		switch val := v.(type) {
805
		case []any:
Patrick Devine's avatar
Patrick Devine committed
806
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
807
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
808
			}
Patrick Devine's avatar
Patrick Devine committed
809
810
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
811
812
813
814
		}
	}
	resp.Parameters = strings.Join(params, "\n")

815
816
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
817
			m.Options[k] = v
818
819
820
		}
	}

821
	var sb strings.Builder
822
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
823
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
824
825
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
826
	resp.Modelfile = sb.String()
827

828
	kvData, tensors, err := getModelData(m.ModelPath, req.Verbose)
829
830
831
	if err != nil {
		return nil, err
	}
832

833
834
835
836
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

837
838
839
840
841
842
	tensorData := make([]api.Tensor, len(tensors.Items()))
	for cnt, t := range tensors.Items() {
		tensorData[cnt] = api.Tensor{Name: t.Name, Type: t.Type(), Shape: t.Shape}
	}
	resp.Tensors = tensorData

843
	if len(m.ProjectorPaths) > 0 {
844
		projectorData, _, err := getModelData(m.ProjectorPaths[0], req.Verbose)
845
846
847
848
849
850
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
851
852
853
	return resp, nil
}

854
func getModelData(digest string, verbose bool) (ggml.KV, ggml.Tensors, error) {
855
856
857
858
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
859
	data, err := llm.LoadModel(digest, maxArraySize)
860
	if err != nil {
861
		return nil, ggml.Tensors{}, err
862
863
	}

864
	kv := data.KV()
865
866
867
868
869
870
871
872
873

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

874
	return kv, data.Tensors(), nil
875
876
}

877
func (s *Server) ListHandler(c *gin.Context) {
878
	ms, err := Manifests(true)
Patrick Devine's avatar
Patrick Devine committed
879
880
881
882
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
883

884
	models := []api.ListModelResponse{}
885
886
	for n, m := range ms {
		var cf ConfigV2
887
888
889
890
891
892
893
894
895
896
897
898
899

		if m.Config.Digest != "" {
			f, err := m.Config.Open()
			if err != nil {
				slog.Warn("bad manifest filepath", "name", n, "error", err)
				continue
			}
			defer f.Close()

			if err := json.NewDecoder(f).Decode(&cf); err != nil {
				slog.Warn("bad manifest config", "name", n, "error", err)
				continue
			}
Patrick Devine's avatar
Patrick Devine committed
900
		}
Michael Yang's avatar
Michael Yang committed
901

902
		// tag should never be masked
903
		models = append(models, api.ListModelResponse{
904
905
906
907
908
909
910
911
912
913
914
915
916
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
917
918
	}

919
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
920
921
922
923
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
924
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
925
926
}

927
func (s *Server) CopyHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
928
929
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
930
931
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
932
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
933
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
934
935
936
		return
	}

Michael Yang's avatar
Michael Yang committed
937
938
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
939
940
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
941
	}
942
943
944
945
946
	src, err := getExistingName(src)
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
947

Michael Yang's avatar
Michael Yang committed
948
949
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
950
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
951
952
		return
	}
953
954
	dst, err = getExistingName(dst)
	if err != nil {
955
956
957
958
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
959
960
961
962
963
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
964
965
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
966
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
967
968
969
970
971
972
973
974
975
976
977
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
978
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
979
980
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
981
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
982
983
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
984
985
986
987
988
989
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
990
991
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
992
993
994
995
996
997
998
999
1000
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

1019
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
1020
1021
1022
1023
1024
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

1025
1026
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
1027
1028
1029
		return
	}

Michael Yang's avatar
Michael Yang committed
1030
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
1031
1032
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

1054
func allowedHost(host string) bool {
1055
1056
	host = strings.ToLower(host)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1057
	if host == "" || host == "localhost" {
1058
1059
1060
		return true
	}

1061
	if hostname, err := os.Hostname(); err == nil && host == strings.ToLower(hostname) {
1062
1063
1064
		return true
	}

Michael Yang's avatar
lint  
Michael Yang committed
1065
	tlds := []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1066
1067
1068
		"localhost",
		"local",
		"internal",
1069
	}
1070

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1071
	// check if the host is a local TLD
1072
1073
1074
1075
1076
1077
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1078
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1079
}
1080

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1081
1082
1083
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
1084
1085
1086
1087
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1088
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
1089
1090
1091
1092
1093
1094
1095
1096
1097
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1098
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1099
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1100
1101
1102
1103
1104
				c.Next()
				return
			}
		}

1105
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
1106
			if c.Request.Method == http.MethodOptions {
1107
1108
1109
1110
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

1111
1112
1113
1114
1115
1116
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
1117
}
1118

1119
func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
	corsConfig := cors.DefaultConfig()
	corsConfig.AllowWildcard = true
	corsConfig.AllowBrowserExtensions = true
	corsConfig.AllowHeaders = []string{
		"Authorization",
		"Content-Type",
		"User-Agent",
		"Accept",
		"X-Requested-With",

		// OpenAI compatibility headers
1131
1132
1133
1134
1135
		"OpenAI-Beta",
		"x-stainless-arch",
		"x-stainless-async",
		"x-stainless-custom-poll-interval",
		"x-stainless-helper-method",
1136
1137
		"x-stainless-lang",
		"x-stainless-os",
1138
1139
		"x-stainless-package-version",
		"x-stainless-poll-helper",
1140
1141
1142
1143
1144
1145
		"x-stainless-retry-count",
		"x-stainless-runtime",
		"x-stainless-runtime-version",
		"x-stainless-timeout",
	}
	corsConfig.AllowOrigins = envconfig.AllowedOrigins()
Michael Yang's avatar
Michael Yang committed
1146

Bruce MacDonald's avatar
Bruce MacDonald committed
1147
	r := gin.Default()
1148
	r.HandleMethodNotAllowed = true
1149
	r.Use(
1150
		cors.New(corsConfig),
1151
		allowedHostsMiddleware(s.addr),
1152
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
1153

1154
1155
1156
1157
1158
1159
	// General
	r.HEAD("/", func(c *gin.Context) { c.String(http.StatusOK, "Ollama is running") })
	r.GET("/", func(c *gin.Context) { c.String(http.StatusOK, "Ollama is running") })
	r.HEAD("/api/version", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"version": version.Version}) })
	r.GET("/api/version", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"version": version.Version}) })

1160
	// Local model cache management (new implementation is at end of function)
1161
1162
	r.POST("/api/pull", s.PullHandler)
	r.POST("/api/push", s.PushHandler)
1163
1164
	r.HEAD("/api/tags", s.ListHandler)
	r.GET("/api/tags", s.ListHandler)
1165
	r.POST("/api/show", s.ShowHandler)
1166
	r.DELETE("/api/delete", s.DeleteHandler)
1167
1168
1169

	// Create
	r.POST("/api/create", s.CreateHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1170
1171
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1172
1173
1174
	r.POST("/api/copy", s.CopyHandler)

	// Inference
1175
	r.GET("/api/ps", s.PsHandler)
1176
1177
1178
1179
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
	r.POST("/api/embed", s.EmbedHandler)
	r.POST("/api/embeddings", s.EmbeddingsHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1180

1181
	// Inference (OpenAI compatibility)
1182
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1183
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1184
	r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
1185
1186
	r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
1187

1188
1189
1190
1191
1192
1193
	if rc != nil {
		// wrap old with new
		rs := &registry.Local{
			Client:   rc,
			Logger:   slog.Default(), // TODO(bmizerany): Take a logger, do not use slog.Default()
			Fallback: r,
1194

1195
1196
1197
			Prune: PruneLayers,
		}
		return rs, nil
1198
1199
	}

1200
	return r, nil
1201
1202
1203
}

func Serve(ln net.Listener) error {
1204
	slog.SetDefault(logutil.NewLogger(os.Stderr, envconfig.LogLevel()))
1205
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1206

1207
1208
1209
1210
1211
1212
1213
1214
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

Michael Yang's avatar
bool  
Michael Yang committed
1215
	if !envconfig.NoPrune() {
1216
1217
1218
1219
1220
1221
1222
		if _, err := Manifests(false); err != nil {
			slog.Warn("corrupt manifests detected, skipping prune operation.  Re-pull or delete to clear", "error", err)
		} else {
			// clean up unused layers and manifests
			if err := PruneLayers(); err != nil {
				return err
			}
1223

1224
1225
1226
1227
			manifestsPath, err := GetManifestPath()
			if err != nil {
				return err
			}
1228

1229
1230
1231
			if err := PruneDirectory(manifestsPath); err != nil {
				return err
			}
1232
1233
1234
		}
	}

1235
1236
	s := &Server{addr: ln.Addr()}

1237
1238
1239
1240
1241
1242
1243
	var rc *ollama.Registry
	if useClient2 {
		var err error
		rc, err = ollama.DefaultRegistry()
		if err != nil {
			return err
		}
1244
1245
	}

1246
	h, err := s.GenerateRoutes(rc)
1247
1248
1249
	if err != nil {
		return err
	}
1250

1251
1252
	http.Handle("/", h)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1253
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1254
1255
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
1256
	s.sched = sched
1257

1258
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1259
	srvr := &http.Server{
1260
1261
1262
1263
1264
1265
1266
1267
1268
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1269
1270
	}

1271
1272
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1273
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1274
1275
	go func() {
		<-signals
1276
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1277
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1278
		sched.unloadAllRunners()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1279
		done()
1280
1281
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1282
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1283

1284
1285
1286
1287
	// register the experimental webp decoder
	// so webp images can be used in multimodal inputs
	image.RegisterFormat("webp", "RIFF????WEBP", webp.Decode, webp.DecodeConfig)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1288
1289
	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
1290
	gpus := discover.GetGPUInfo()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1291
	gpus.LogDetails()
1292

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1293
1294
1295
1296
1297
1298
1299
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1300
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1301
}
Michael Yang's avatar
Michael Yang committed
1302

1303
func waitForStream(c *gin.Context, ch chan any) {
1304
	c.Header("Content-Type", "application/json")
1305
	var latest api.ProgressResponse
1306
1307
1308
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
1309
			latest = r
1310
		case gin.H:
Josh's avatar
Josh committed
1311
1312
1313
1314
			status, ok := r["status"].(int)
			if !ok {
				status = http.StatusInternalServerError
			}
1315
1316
1317
			errorMsg, ok := r["error"].(string)
			if !ok {
				errorMsg = "unknown error"
1318
			}
1319
1320
			c.JSON(status, gin.H{"error": errorMsg})
			return
1321
		default:
1322
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unknown message type"})
1323
1324
1325
			return
		}
	}
1326
1327

	c.JSON(http.StatusOK, latest)
1328
1329
}

Michael Yang's avatar
Michael Yang committed
1330
func streamResponse(c *gin.Context, ch chan any) {
1331
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1332
1333
1334
1335
1336
1337
1338
1339
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1340
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1341
1342
1343
			return false
		}

1344
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1345
1346
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1347
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1348
1349
1350
1351
1352
1353
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1354

1355
func (s *Server) PsHandler(c *gin.Context) {
1356
	models := []api.ProcessModelResponse{}
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1368
		mr := api.ProcessModelResponse{
1369
1370
1371
1372
1373
1374
1375
1376
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1377
1378
1379
1380
1381
1382
1383
1384
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1385
1386
1387
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1388
1389
1390
1391
1392
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1393
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1394
1395
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1396
func (s *Server) ChatHandler(c *gin.Context) {
1397
1398
	checkpointStart := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
1399
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1400
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1401
1402
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1403
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1404
1405
1406
1407
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
1408
1409
1410
1411
1412
1413
1414
	// expire the runner
	if len(req.Messages) == 0 && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
		model, err := GetModel(req.Model)
		if err != nil {
			switch {
			case os.IsNotExist(err):
				c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
1415
			case err.Error() == errtypes.InvalidModelNameErrMsg:
Patrick Devine's avatar
Patrick Devine committed
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
				c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			}
			return
		}
		s.sched.expireRunner(model)

		c.JSON(http.StatusOK, api.ChatResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
			Done:       true,
			DoneReason: "unload",
		})
		return
	}

1434
	caps := []model.Capability{model.CapabilityCompletion}
1435
	if len(req.Tools) > 0 {
1436
		caps = append(caps, model.CapabilityTools)
Michael Yang's avatar
tools  
Michael Yang committed
1437
1438
	}

1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
	name := model.ParseName(req.Model)
	if !name.IsValid() {
		c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}
	name, err := getExistingName(name)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

	r, m, opts, err := s.scheduleRunner(c.Request.Context(), name.String(), caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1451
1452
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1453
		return
Michael Yang's avatar
Michael Yang committed
1454
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1455
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1456
1457
		return
	}
Michael Yang's avatar
Michael Yang committed
1458

1459
1460
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
1461
1462
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1463
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1464
1465
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1466
1467
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1468
		})
1469
1470
1471
		return
	}

Michael Yang's avatar
Michael Yang committed
1472
	msgs := append(m.Messages, req.Messages...)
1473
	if req.Messages[0].Role != "system" && m.System != "" {
Michael Yang's avatar
Michael Yang committed
1474
		msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...)
1475
	}
1476
	msgs = filterThinkTags(msgs, m)
1477

Michael Yang's avatar
Michael Yang committed
1478
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
Michael Yang's avatar
Michael Yang committed
1479
	if err != nil {
1480
		slog.Error("chat prompt error", "error", err)
Michael Yang's avatar
Michael Yang committed
1481
1482
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1483
1484
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
1485
1486
1487
	ch := make(chan any)
	go func() {
		defer close(ch)
1488
		var sb strings.Builder
1489
		var toolCallIndex int = 0
Michael Yang's avatar
Michael Yang committed
1490
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1491
1492
1493
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1494
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1495
		}, func(r llm.CompletionResponse) {
1496
			res := api.ChatResponse{
1497
1498
1499
1500
				Model:     req.Model,
				CreatedAt: time.Now().UTC(),
				Message:   api.Message{Role: "assistant", Content: r.Content},
				Done:      r.Done,
Bruce MacDonald's avatar
Bruce MacDonald committed
1501
1502
1503
1504
1505
1506
1507
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
1508
1509

			if r.Done {
1510
				res.DoneReason = r.DoneReason.String()
1511
1512
1513
1514
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
			}

1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
			// TODO: tool call checking and filtering should be moved outside of this callback once streaming
			// however this was a simple change for now without reworking streaming logic of this (and other)
			// handlers
			if req.Stream != nil && !*req.Stream || len(req.Tools) == 0 {
				ch <- res
				return
			}

			// Streaming tool calls:
			// If tools are recognized, use a flag to track the sending of a tool downstream
			// This ensures that content is cleared from the message on the last chunk sent
			sb.WriteString(r.Content)
			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
				res.Message.ToolCalls = toolCalls
1529
1530
1531
1532
				for i := range toolCalls {
					toolCalls[i].Function.Index = toolCallIndex
					toolCallIndex++
				}
1533
1534
1535
1536
1537
1538
1539
1540
				res.Message.Content = ""
				sb.Reset()
				ch <- res
				return
			}

			if r.Done {
				// Send any remaining content if no tool calls were detected
1541
				if toolCallIndex == 0 {
1542
1543
1544
1545
					res.Message.Content = sb.String()
				}
				ch <- res
			}
Michael Yang's avatar
Michael Yang committed
1546
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1547
1548
1549
1550
1551
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
tools  
Michael Yang committed
1552
		var resp api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1553
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1554
1555
		for rr := range ch {
			switch t := rr.(type) {
1556
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1557
				sb.WriteString(t.Message.Content)
Michael Yang's avatar
tools  
Michael Yang committed
1558
				resp = t
1559
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1560
1561
1562
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1563
				}
Michael Yang's avatar
Michael Yang committed
1564
1565
1566

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1567
			default:
Michael Yang's avatar
Michael Yang committed
1568
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1569
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1570
1571
			}
		}
1572

Michael Yang's avatar
tools  
Michael Yang committed
1573
		resp.Message.Content = sb.String()
1574
1575
1576
1577
1578
1579

		if len(req.Tools) > 0 {
			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
				resp.Message.ToolCalls = toolCalls
				resp.Message.Content = ""
			}
Michael Yang's avatar
tools  
Michael Yang committed
1580
1581
1582
		}

		c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
1583
1584
1585
1586
1587
		return
	}

	streamResponse(c, ch)
}
1588

Michael Yang's avatar
Michael Yang committed
1589
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1590
	switch {
1591
	case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
Michael Yang's avatar
Michael Yang committed
1592
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1593
	case errors.Is(err, context.Canceled):
1594
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1595
	case errors.Is(err, ErrMaxQueue):
1596
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1597
1598
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1599
1600
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1601
1602
	}
}
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622

var thinkTagRegexp = regexp.MustCompile(`<think>(?s).*?</think>(\n)*`)

func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
	if m.Config.ModelFamily == "qwen3" || model.ParseName(m.Name).Model == "deepseek-r1" {
		finalUserIndex := -1
		for i, msg := range msgs {
			if msg.Role == "user" {
				finalUserIndex = i
			}
		}

		for i, msg := range msgs {
			if msg.Role == "assistant" && i < finalUserIndex {
				msgs[i].Content = thinkTagRegexp.ReplaceAllString(msg.Content, "")
			}
		}
	}
	return msgs
}