routes.go 37.6 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
12
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
14
	"net"
	"net/http"
15
	"net/netip"
16
	"os"
17
	"os/signal"
Michael Yang's avatar
Michael Yang committed
18
	"path/filepath"
19
	"slices"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
	"github.com/gin-gonic/gin"
26
	"golang.org/x/sync/errgroup"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27

28
	"github.com/ollama/ollama/api"
29
	"github.com/ollama/ollama/build"
30
	"github.com/ollama/ollama/discover"
31
	"github.com/ollama/ollama/envconfig"
32
33
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
34
	"github.com/ollama/ollama/parser"
35
	"github.com/ollama/ollama/runners"
Michael Yang's avatar
Michael Yang committed
36
	"github.com/ollama/ollama/template"
37
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
38
	"github.com/ollama/ollama/types/model"
39
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
40
41
)

Michael Yang's avatar
Michael Yang committed
42
43
var mode string = gin.DebugMode

44
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46
	addr  net.Addr
	sched *Scheduler
47
48
}

Michael Yang's avatar
Michael Yang committed
49
50
51
52
53
54
55
56
57
58
59
60
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
lint  
Michael Yang committed
61
62
63
64
var (
	errRequired    = errors.New("is required")
	errBadTemplate = errors.New("template error")
)
Michael Yang's avatar
Michael Yang committed
65

66
67
68
69
70
71
72
73
74
75
76
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
}

Michael Yang's avatar
Michael Yang committed
79
80
81
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
82
	if name == "" {
Michael Yang's avatar
Michael Yang committed
83
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
84
85
	}

Michael Yang's avatar
Michael Yang committed
86
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
87
	if err != nil {
Michael Yang's avatar
Michael Yang committed
88
		return nil, nil, nil, err
89
90
	}

Michael Yang's avatar
Michael Yang committed
91
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
92
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
93
94
	}

Michael Yang's avatar
Michael Yang committed
95
	opts, err := modelOptions(model, requestOpts)
96
	if err != nil {
Michael Yang's avatar
Michael Yang committed
97
		return nil, nil, nil, err
98
99
	}

Michael Yang's avatar
Michael Yang committed
100
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
101
102
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
103
104
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
105
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
106
107
	}

Michael Yang's avatar
Michael Yang committed
108
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
109
110
111
}

func (s *Server) GenerateHandler(c *gin.Context) {
112
	checkpointStart := time.Now()
Michael Yang's avatar
Michael Yang committed
113
114
115
116
117
118
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
119
120
121
		return
	}

122
123
124
125
126
127
128
129
130
131
132
133
134
	model, err := GetModel(req.Model)
	if err != nil {
		switch {
		case os.IsNotExist(err):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

Patrick Devine's avatar
Patrick Devine committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
	// expire the runner
	if req.Prompt == "" && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
		s.sched.expireRunner(model)

		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Response:   "",
			Done:       true,
			DoneReason: "unload",
		})
		return
	}

Michael Yang's avatar
Michael Yang committed
149
150
151
152
153
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
154
155
156
		return
	}

Michael Yang's avatar
Michael Yang committed
157
	caps := []Capability{CapabilityCompletion}
158
159
160
161
	if req.Suffix != "" {
		caps = append(caps, CapabilityInsert)
	}

Michael Yang's avatar
Michael Yang committed
162
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
163
164
165
166
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
167
168
169
170
		handleScheduleError(c, req.Model, err)
		return
	}

171
172
	checkpointLoaded := time.Now()

173
	// load the model
Michael Yang's avatar
Michael Yang committed
174
175
176
177
178
179
180
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
181
182
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
183

184
185
186
187
188
189
	isMllama := checkMllamaModelFamily(model)
	if isMllama && len(req.Images) > 1 {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "this model only supports one image: more than one image sent"})
		return
	}

Michael Yang's avatar
Michael Yang committed
190
191
192
193
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
194

Michael Yang's avatar
Michael Yang committed
195
196
	prompt := req.Prompt
	if !req.Raw {
Michael Yang's avatar
Michael Yang committed
197
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
198
199
200
201
202
203
204
205
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

206
207
208
209
210
211
212
213
214
215
216
217
		var values template.Values
		if req.Suffix != "" {
			values.Prompt = prompt
			values.Suffix = req.Suffix
		} else {
			var msgs []api.Message
			if req.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: req.System})
			} else if m.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: m.System})
			}

Michael Yang's avatar
Michael Yang committed
218
219
220
221
			if req.Context == nil {
				msgs = append(msgs, m.Messages...)
			}

222
			for _, i := range images {
223
224
225
226
227
				if isMllama {
					msgs = append(msgs, api.Message{Role: "user", Content: "<|image|>"})
				} else {
					msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
				}
228
229
230
231
232
			}

			values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
		}

Michael Yang's avatar
Michael Yang committed
233
234
		var b bytes.Buffer
		if req.Context != nil {
235
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Michael Yang's avatar
Michael Yang committed
236
237
238
239
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
240
			b.WriteString(s)
Michael Yang's avatar
Michael Yang committed
241
		}
242
243
244
245
246
247
248

		if err := tmpl.Execute(&b, values); err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
249
250
	}

Michael Yang's avatar
Michael Yang committed
251
	slog.Debug("generate request", "prompt", prompt, "images", images)
252

Bruce MacDonald's avatar
Bruce MacDonald committed
253
254
	ch := make(chan any)
	go func() {
255
256
		// TODO (jmorganca): avoid building the response twice both here and below
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
257
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
258
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
259
260
261
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
262
			Options: opts,
263
264
		}, func(cr llm.CompletionResponse) {
			res := api.GenerateResponse{
265
266
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
267
268
269
				Response:   cr.Content,
				Done:       cr.Done,
				DoneReason: cr.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
270
				Metrics: api.Metrics{
271
272
273
274
					PromptEvalCount:    cr.PromptEvalCount,
					PromptEvalDuration: cr.PromptEvalDuration,
					EvalCount:          cr.EvalCount,
					EvalDuration:       cr.EvalDuration,
Bruce MacDonald's avatar
Bruce MacDonald committed
275
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
276
			}
277
278
279
280
281
282
283
284
285
286

			if _, err := sb.WriteString(cr.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
			}

			if cr.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
287
					tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
288
289
290
291
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
292
					res.Context = tokens
293
294
295
296
				}
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
297
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
298
299
300
301
302
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
303
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
304
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
305
306
		for rr := range ch {
			switch t := rr.(type) {
307
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
308
309
				sb.WriteString(t.Response)
				r = t
310
			case gin.H:
Michael Yang's avatar
Michael Yang committed
311
312
313
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
314
				}
Michael Yang's avatar
Michael Yang committed
315
316
317

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
318
			default:
Michael Yang's avatar
Michael Yang committed
319
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
320
321
322
				return
			}
		}
323

Michael Yang's avatar
Michael Yang committed
324
325
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
326
327
328
329
330
331
		return
	}

	streamResponse(c, ch)
}

332
func (s *Server) EmbedHandler(c *gin.Context) {
333
	checkpointStart := time.Now()
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
	var req api.EmbedRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	truncate := true

	if req.Truncate != nil && !*req.Truncate {
		truncate = false
	}

	var input []string

	switch i := req.Input.(type) {
	case string:
		if len(i) > 0 {
			input = append(input, i)
		}
	case []any:
		for _, v := range i {
			if _, ok := v.(string); !ok {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
				return
			}
			input = append(input, v.(string))
		}
	default:
367
368
369
370
		if req.Input != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
			return
		}
371
372
373
374
375
376
377
378
	}

	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
	if err != nil {
		handleScheduleError(c, req.Model, err)
		return
	}

379
380
	checkpointLoaded := time.Now()

381
382
383
384
385
	if len(input) == 0 {
		c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
		return
	}

386
387
388
389
390
391
	kvData, err := getKVData(m.ModelPath, false)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

392
	var count int
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
	for i, s := range input {
		tokens, err := r.Tokenize(c.Request.Context(), s)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
		if len(tokens) > ctxLen {
			if !truncate {
				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
				return
			}

			tokens = tokens[:ctxLen]
			s, err = r.Detokenize(c.Request.Context(), tokens)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

415
416
		count += len(tokens)

417
418
		input[i] = s
	}
419
420
421
422
423
424
425
426
427
428
429
430

	var g errgroup.Group
	embeddings := make([][]float32, len(input))
	for i, text := range input {
		g.Go(func() error {
			embedding, err := r.Embedding(c.Request.Context(), text)
			if err != nil {
				return err
			}
			embeddings[i] = normalize(embedding)
			return nil
		})
431
432
	}

433
434
435
436
	if err := g.Wait(); err != nil {
		slog.Error("embedding generation failed", "error", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Errorf("failed to generate embeddings: %v", err)})
		return
437
438
439
	}

	resp := api.EmbedResponse{
440
		Model:           req.Model,
441
		Embeddings:      embeddings,
442
443
		TotalDuration:   time.Since(checkpointStart),
		LoadDuration:    checkpointLoaded.Sub(checkpointStart),
444
		PromptEvalCount: count,
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
	}
	c.JSON(http.StatusOK, resp)
}

func normalize(vec []float32) []float32 {
	var sum float32
	for _, v := range vec {
		sum += v * v
	}

	norm := float32(0.0)
	if sum > 0 {
		norm = float32(1.0 / math.Sqrt(float64(sum)))
	}

	for i := range vec {
		vec[i] *= norm
	}
	return vec
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
466
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
467
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
468
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
469
470
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
471
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
472
473
474
475
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
476
	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
477
	if err != nil {
Michael Yang's avatar
Michael Yang committed
478
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
479
480
481
		return
	}

482
483
484
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
485
486
487
		return
	}

488
	embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
489
	if err != nil {
490
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
491
492
493
494
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

495
496
497
	var e []float64
	for _, v := range embedding {
		e = append(e, float64(v))
498
499
500
	}

	resp := api.EmbeddingResponse{
501
		Embedding: e,
502
503
	}
	c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
504
505
}

506
func (s *Server) PullHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
507
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
508
509
510
511
512
513
514
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
515
516
517
		return
	}

518
519
520
521
522
523
524
525
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
526
527
528
		return
	}

529
530
531
	ch := make(chan any)
	go func() {
		defer close(ch)
532
533
		fn := func(r api.ProgressResponse) {
			ch <- r
534
		}
535

Michael Yang's avatar
Michael Yang committed
536
		regOpts := &registryOptions{
537
538
539
			Insecure: req.Insecure,
		}

540
541
542
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

543
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
544
			ch <- gin.H{"error": err.Error()}
545
546
547
		}
	}()

548
549
550
551
552
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

553
554
555
	streamResponse(c, ch)
}

556
func (s *Server) PushHandler(c *gin.Context) {
557
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
558
559
560
561
562
563
564
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
565
566
		return
	}
Michael Yang's avatar
Michael Yang committed
567

Michael Yang's avatar
Michael Yang committed
568
569
570
571
572
573
574
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
575
576
577
		return
	}

578
579
580
	ch := make(chan any)
	go func() {
		defer close(ch)
581
582
		fn := func(r api.ProgressResponse) {
			ch <- r
583
		}
584

Michael Yang's avatar
Michael Yang committed
585
		regOpts := &registryOptions{
586
587
588
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
589
590
591
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
592
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
593
			ch <- gin.H{"error": err.Error()}
594
595
596
		}
	}()

597
598
599
600
601
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

602
603
604
	streamResponse(c, ch)
}

605
606
607
608
609
610
611
612
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
Michael Yang's avatar
lint  
Michael Yang committed
613
			return errors.New("a model with that name already exists")
614
615
616
617
618
619
		}
	}

	return nil
}

620
func (s *Server) CreateHandler(c *gin.Context) {
621
622
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
623
624
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
625
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
626
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
627
		return
628
629
	}

630
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
631
	if !name.IsValid() {
632
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
633
634
635
		return
	}

636
637
638
639
640
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

641
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
642
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
643
644
		return
	}
Michael Yang's avatar
Michael Yang committed
645

646
647
648
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
649
650
651
652
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
653
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
654

655
		sr = f
Michael Yang's avatar
Michael Yang committed
656
	}
Michael Yang's avatar
Michael Yang committed
657

658
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
659
660
661
662
663
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
664
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
665
666
	go func() {
		defer close(ch)
667
668
		fn := func(resp api.ProgressResponse) {
			ch <- resp
669
670
		}

671
672
673
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

674
		quantization := cmp.Or(r.Quantize, r.Quantization)
Josh's avatar
Josh committed
675
676
677
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); errors.Is(err, errBadTemplate) {
			ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
		} else if err != nil {
Michael Yang's avatar
Michael Yang committed
678
			ch <- gin.H{"error": err.Error()}
679
		}
Michael Yang's avatar
Michael Yang committed
680
	}()
Michael Yang's avatar
Michael Yang committed
681

682
	if r.Stream != nil && !*r.Stream {
683
684
685
686
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
687
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
688
689
}

690
func (s *Server) DeleteHandler(c *gin.Context) {
691
692
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
693
694
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
695
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
696
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
697
698
699
		return
	}

700
701
702
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
703
704
		return
	}
Michael Yang's avatar
Michael Yang committed
705

706
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
707
	if err != nil {
708
709
710
711
712
713
		switch {
		case os.IsNotExist(err):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", cmp.Or(r.Model, r.Name))})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
Michael Yang's avatar
Michael Yang committed
714
715
716
		return
	}

717
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
718
719
720
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
721
722
723
724
725

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
726
727
}

728
func (s *Server) ShowHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
729
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
730
731
732
733
734
735
736
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
737
738
739
		return
	}

Michael Yang's avatar
Michael Yang committed
740
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
741
		// noop
Michael Yang's avatar
Michael Yang committed
742
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
743
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
744
	} else {
745
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
746
747
748
		return
	}

749
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
750
	if err != nil {
751
752
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
753
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
754
755
756
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
757
758
759
760
761
762
763
764
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

765
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
766
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
767
768
769
770
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
771
	modelDetails := api.ModelDetails{
772
773
774
775
776
777
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
778
779
	}

780
	if req.System != "" {
781
		m.System = req.System
782
783
	}

Michael Yang's avatar
Michael Yang committed
784
785
786
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
787
788
	}

789
790
	n := model.ParseName(req.Model)
	if !n.IsValid() {
Michael Yang's avatar
lint  
Michael Yang committed
791
		return nil, errors.New("invalid model name")
792
793
794
795
796
797
798
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
799
	resp := &api.ShowResponse{
800
801
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
802
		Template:   m.Template.String(),
803
804
805
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
806
807
808
809
	}

	var params []string
	cs := 30
810
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
811
812
813
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
814
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
815
			}
Patrick Devine's avatar
Patrick Devine committed
816
817
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
818
819
820
821
		}
	}
	resp.Parameters = strings.Join(params, "\n")

822
823
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
824
			m.Options[k] = v
825
826
827
		}
	}

828
	var sb strings.Builder
829
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
830
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
831
832
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
833
	resp.Modelfile = sb.String()
834

835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
851
852
853
	return resp, nil
}

854
func getKVData(digest string, verbose bool) (llm.KV, error) {
855
856
857
858
859
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

877
func (s *Server) ListHandler(c *gin.Context) {
878
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
879
880
881
882
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
883

884
	models := []api.ListModelResponse{}
885
886
	for n, m := range ms {
		var cf ConfigV2
887
888
889
890
891
892
893
894
895
896
897
898
899

		if m.Config.Digest != "" {
			f, err := m.Config.Open()
			if err != nil {
				slog.Warn("bad manifest filepath", "name", n, "error", err)
				continue
			}
			defer f.Close()

			if err := json.NewDecoder(f).Decode(&cf); err != nil {
				slog.Warn("bad manifest config", "name", n, "error", err)
				continue
			}
Patrick Devine's avatar
Patrick Devine committed
900
		}
Michael Yang's avatar
Michael Yang committed
901

902
		// tag should never be masked
903
		models = append(models, api.ListModelResponse{
904
905
906
907
908
909
910
911
912
913
914
915
916
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
917
918
	}

919
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
920
921
922
923
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
924
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
925
926
}

927
func (s *Server) CopyHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
928
929
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
930
931
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
932
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
933
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
934
935
936
		return
	}

Michael Yang's avatar
Michael Yang committed
937
938
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
939
940
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
941
942
	}

Michael Yang's avatar
Michael Yang committed
943
944
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
945
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
946
947
		return
	}
Michael Yang's avatar
Michael Yang committed
948

949
950
951
952
953
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
954
955
956
957
958
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
959
960
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
961
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
962
963
964
965
966
967
968
969
970
971
972
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
973
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
974
975
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
976
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
977
978
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
979
980
981
982
983
984
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
985
986
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
987
988
989
990
991
992
993
994
995
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

1014
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
1015
1016
1017
1018
1019
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

1020
1021
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
1022
1023
1024
		return
	}

Michael Yang's avatar
Michael Yang committed
1025
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
1026
1027
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

1049
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1050
	if host == "" || host == "localhost" {
1051
1052
1053
1054
1055
1056
1057
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

Michael Yang's avatar
lint  
Michael Yang committed
1058
	tlds := []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1059
1060
1061
		"localhost",
		"local",
		"internal",
1062
	}
1063

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1064
	// check if the host is a local TLD
1065
1066
1067
1068
1069
1070
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1071
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1072
}
1073

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1074
1075
1076
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
1077
1078
1079
1080
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1081
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
1082
1083
1084
1085
1086
1087
1088
1089
1090
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1091
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1092
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1093
1094
1095
1096
1097
				c.Next()
				return
			}
		}

1098
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
1099
			if c.Request.Method == http.MethodOptions {
1100
1101
1102
1103
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

1104
1105
1106
1107
1108
1109
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
1110
}
1111

1112
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
1113
1114
	config := cors.DefaultConfig()
	config.AllowWildcard = true
1115
	config.AllowBrowserExtensions = true
1116
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
1117
1118
1119
1120
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
Michael Yang's avatar
origins  
Michael Yang committed
1121
	config.AllowOrigins = envconfig.Origins()
Michael Yang's avatar
Michael Yang committed
1122

Bruce MacDonald's avatar
Bruce MacDonald committed
1123
	r := gin.Default()
1124
1125
	r.Use(
		cors.New(config),
1126
		allowedHostsMiddleware(s.addr),
1127
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
1128

1129
	r.POST("/api/pull", s.PullHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1130
1131
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
1132
	r.POST("/api/embed", s.EmbedHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1133
	r.POST("/api/embeddings", s.EmbeddingsHandler)
1134
1135
1136
1137
1138
	r.POST("/api/create", s.CreateHandler)
	r.POST("/api/push", s.PushHandler)
	r.POST("/api/copy", s.CopyHandler)
	r.DELETE("/api/delete", s.DeleteHandler)
	r.POST("/api/show", s.ShowHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1139
1140
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1141
	r.GET("/api/ps", s.PsHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1142

1143
	// Compatibility endpoints
1144
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1145
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1146
	r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
1147
1148
	r.GET("/v1/models", openai.ListMiddleware(), s.ListHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowHandler)
1149

Michael Yang's avatar
Michael Yang committed
1150
1151
1152
1153
1154
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

1155
		r.Handle(method, "/api/tags", s.ListHandler)
Michael Yang's avatar
Michael Yang committed
1156
1157
1158
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
1159
1160
	}

1161
1162
1163
1164
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
1165
	level := slog.LevelInfo
Michael Yang's avatar
Michael Yang committed
1166
	if envconfig.Debug() {
Michael Yang's avatar
Michael Yang committed
1167
		level = slog.LevelDebug
1168
	}
Michael Yang's avatar
Michael Yang committed
1169

1170
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

1186
1187
1188
1189
1190
1191
1192
1193
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

Michael Yang's avatar
bool  
Michael Yang committed
1194
	if !envconfig.NoPrune() {
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1210
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1211
1212
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1213
	s := &Server{addr: ln.Addr(), sched: sched}
1214
1215

	http.Handle("/", s.GenerateRoutes())
1216

1217
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1218
	srvr := &http.Server{
1219
1220
1221
1222
1223
1224
1225
1226
1227
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1228
1229
	}

1230
1231
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1232
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1233
1234
	go func() {
		<-signals
1235
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1236
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1237
		sched.unloadAllRunners()
1238
		runners.Cleanup(build.EmbedFS)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1239
		done()
1240
1241
	}()

1242
1243
	if _, err := runners.Refresh(build.EmbedFS); err != nil {
		return fmt.Errorf("unable to initialize llm runners %w", err)
1244
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1245

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1246
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1247
1248
1249

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
1250
	gpus := discover.GetGPUInfo()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1251
	gpus.LogDetails()
1252

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1253
1254
1255
1256
1257
1258
1259
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1260
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1261
}
Michael Yang's avatar
Michael Yang committed
1262

1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
Josh's avatar
Josh committed
1273
1274
1275
1276
			status, ok := r["status"].(int)
			if !ok {
				status = http.StatusInternalServerError
			}
1277
			if errorMsg, ok := r["error"].(string); ok {
Josh's avatar
Josh committed
1278
				c.JSON(status, gin.H{"error": errorMsg})
1279
1280
				return
			} else {
Josh's avatar
Josh committed
1281
				c.JSON(status, gin.H{"error": "unexpected error format in progress response"})
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1292
func streamResponse(c *gin.Context, ch chan any) {
1293
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1294
1295
1296
1297
1298
1299
1300
1301
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1302
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1303
1304
1305
			return false
		}

1306
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1307
1308
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1309
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1310
1311
1312
1313
1314
1315
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1316

1317
func (s *Server) PsHandler(c *gin.Context) {
1318
	models := []api.ProcessModelResponse{}
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1330
		mr := api.ProcessModelResponse{
1331
1332
1333
1334
1335
1336
1337
1338
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1339
1340
1341
1342
1343
1344
1345
1346
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1347
1348
1349
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1350
1351
1352
1353
1354
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1355
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1356
1357
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1358
func (s *Server) ChatHandler(c *gin.Context) {
1359
1360
	checkpointStart := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
1361
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1362
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1363
1364
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1365
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1366
1367
1368
1369
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
	// expire the runner
	if len(req.Messages) == 0 && req.KeepAlive != nil && int(req.KeepAlive.Seconds()) == 0 {
		model, err := GetModel(req.Model)
		if err != nil {
			switch {
			case os.IsNotExist(err):
				c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
			case err.Error() == "invalid model name":
				c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			}
			return
		}
		s.sched.expireRunner(model)

		c.JSON(http.StatusOK, api.ChatResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
			Done:       true,
			DoneReason: "unload",
		})
		return
	}

Michael Yang's avatar
Michael Yang committed
1396
	caps := []Capability{CapabilityCompletion}
1397
	if len(req.Tools) > 0 {
Michael Yang's avatar
tools  
Michael Yang committed
1398
1399
1400
		caps = append(caps, CapabilityTools)
	}

Michael Yang's avatar
Michael Yang committed
1401
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1402
1403
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1404
		return
Michael Yang's avatar
Michael Yang committed
1405
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1406
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1407
1408
		return
	}
Michael Yang's avatar
Michael Yang committed
1409

1410
1411
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
1412
1413
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1414
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1415
1416
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1417
1418
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1419
		})
1420
1421
1422
		return
	}

Michael Yang's avatar
Michael Yang committed
1423
	msgs := append(m.Messages, req.Messages...)
1424
	if req.Messages[0].Role != "system" && m.System != "" {
Michael Yang's avatar
Michael Yang committed
1425
		msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...)
1426
1427
	}

Michael Yang's avatar
Michael Yang committed
1428
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
Michael Yang's avatar
Michael Yang committed
1429
1430
1431
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1432
1433
	}

Michael Yang's avatar
Michael Yang committed
1434
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1435

Bruce MacDonald's avatar
Bruce MacDonald committed
1436
1437
1438
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1439
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1440
1441
1442
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1443
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1444
		}, func(r llm.CompletionResponse) {
1445
			res := api.ChatResponse{
1446
1447
1448
1449
1450
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1451
1452
1453
1454
1455
1456
1457
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
1458
1459
1460
1461
1462
1463
1464

			if r.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
1465
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1466
1467
1468
1469
1470
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
tools  
Michael Yang committed
1471
		var resp api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1472
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1473
1474
		for rr := range ch {
			switch t := rr.(type) {
1475
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1476
				sb.WriteString(t.Message.Content)
Michael Yang's avatar
tools  
Michael Yang committed
1477
				resp = t
1478
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1479
1480
1481
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1482
				}
Michael Yang's avatar
Michael Yang committed
1483
1484
1485

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1486
			default:
Michael Yang's avatar
Michael Yang committed
1487
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1488
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1489
1490
			}
		}
1491

Michael Yang's avatar
tools  
Michael Yang committed
1492
		resp.Message.Content = sb.String()
1493
1494
1495
1496
1497
1498

		if len(req.Tools) > 0 {
			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
				resp.Message.ToolCalls = toolCalls
				resp.Message.Content = ""
			}
Michael Yang's avatar
tools  
Michael Yang committed
1499
1500
1501
		}

		c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
1502
1503
1504
1505
1506
		return
	}

	streamResponse(c, ch)
}
1507

Michael Yang's avatar
Michael Yang committed
1508
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1509
	switch {
1510
	case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
Michael Yang's avatar
Michael Yang committed
1511
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1512
	case errors.Is(err, context.Canceled):
1513
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1514
	case errors.Is(err, ErrMaxQueue):
1515
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1516
1517
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1518
1519
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1520
1521
	}
}