routes.go 34.9 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
12
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
14
	"net"
	"net/http"
15
	"net/netip"
16
	"os"
17
	"os/signal"
Michael Yang's avatar
Michael Yang committed
18
	"path/filepath"
19
	"slices"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
	"github.com/gin-gonic/gin"

27
	"github.com/ollama/ollama/api"
28
	"github.com/ollama/ollama/envconfig"
29
30
31
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
32
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
33
	"github.com/ollama/ollama/template"
34
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
35
	"github.com/ollama/ollama/types/model"
36
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
37
38
)

Michael Yang's avatar
Michael Yang committed
39
40
var mode string = gin.DebugMode

41
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	addr  net.Addr
	sched *Scheduler
44
45
}

Michael Yang's avatar
Michael Yang committed
46
47
48
49
50
51
52
53
54
55
56
57
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
Michael Yang committed
58
59
var errRequired = errors.New("is required")

60
61
62
63
64
65
66
67
68
69
70
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
71
72
}

Michael Yang's avatar
Michael Yang committed
73
74
75
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
76
	if name == "" {
Michael Yang's avatar
Michael Yang committed
77
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
	}

Michael Yang's avatar
Michael Yang committed
80
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
81
	if err != nil {
Michael Yang's avatar
Michael Yang committed
82
		return nil, nil, nil, err
83
84
	}

Michael Yang's avatar
Michael Yang committed
85
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
86
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
87
88
	}

Michael Yang's avatar
Michael Yang committed
89
	opts, err := modelOptions(model, requestOpts)
90
	if err != nil {
Michael Yang's avatar
Michael Yang committed
91
		return nil, nil, nil, err
92
93
	}

Michael Yang's avatar
Michael Yang committed
94
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
96
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
97
98
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
99
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
100
101
	}

Michael Yang's avatar
Michael Yang committed
102
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
103
104
105
}

func (s *Server) GenerateHandler(c *gin.Context) {
106
	checkpointStart := time.Now()
Michael Yang's avatar
Michael Yang committed
107
108
109
110
111
112
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
115
		return
	}

Michael Yang's avatar
Michael Yang committed
116
117
118
119
120
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
121
122
123
		return
	}

Michael Yang's avatar
Michael Yang committed
124
	caps := []Capability{CapabilityCompletion}
125
126
127
128
	if req.Suffix != "" {
		caps = append(caps, CapabilityInsert)
	}

Michael Yang's avatar
Michael Yang committed
129
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
130
131
132
133
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
134
135
136
137
		handleScheduleError(c, req.Model, err)
		return
	}

138
139
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
140
141
142
143
144
145
146
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
147
148
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
149

Michael Yang's avatar
Michael Yang committed
150
151
152
153
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
154

Michael Yang's avatar
Michael Yang committed
155
156
	prompt := req.Prompt
	if !req.Raw {
Michael Yang's avatar
Michael Yang committed
157
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
158
159
160
161
162
163
164
165
166
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		var b bytes.Buffer
Bruce MacDonald's avatar
Bruce MacDonald committed
167
		if req.Context != nil {
Michael Yang's avatar
Michael Yang committed
168
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
169
170
171
172
173
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Michael Yang's avatar
Michael Yang committed
174
			b.WriteString(s)
175
176
		}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
		var values template.Values
		if req.Suffix != "" {
			values.Prompt = prompt
			values.Suffix = req.Suffix
		} else {
			var msgs []api.Message
			if req.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: req.System})
			} else if m.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: m.System})
			}

			for _, i := range images {
				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
			}

			values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
		}

		if err := tmpl.Execute(&b, values); err != nil {
Michael Yang's avatar
Michael Yang committed
197
198
199
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
200

Michael Yang's avatar
Michael Yang committed
201
		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
202
203
	}

Michael Yang's avatar
Michael Yang committed
204
	slog.Debug("generate request", "prompt", prompt, "images", images)
205

Bruce MacDonald's avatar
Bruce MacDonald committed
206
207
	ch := make(chan any)
	go func() {
208
209
		// TODO (jmorganca): avoid building the response twice both here and below
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
210
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
211
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
212
213
214
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
215
			Options: opts,
216
217
		}, func(cr llm.CompletionResponse) {
			res := api.GenerateResponse{
218
219
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
220
221
222
				Response:   cr.Content,
				Done:       cr.Done,
				DoneReason: cr.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
223
				Metrics: api.Metrics{
224
225
226
227
					PromptEvalCount:    cr.PromptEvalCount,
					PromptEvalDuration: cr.PromptEvalDuration,
					EvalCount:          cr.EvalCount,
					EvalDuration:       cr.EvalDuration,
Bruce MacDonald's avatar
Bruce MacDonald committed
228
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
229
			}
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249

			if _, err := sb.WriteString(cr.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
			}

			if cr.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
					tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
					res.Context = append(req.Context, tokens...)
				}
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
250
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
251
252
253
254
255
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
256
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
257
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
258
259
		for rr := range ch {
			switch t := rr.(type) {
260
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
261
262
				sb.WriteString(t.Response)
				r = t
263
			case gin.H:
Michael Yang's avatar
Michael Yang committed
264
265
266
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
267
				}
Michael Yang's avatar
Michael Yang committed
268
269
270

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
271
			default:
Michael Yang's avatar
Michael Yang committed
272
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
273
274
275
				return
			}
		}
276

Michael Yang's avatar
Michael Yang committed
277
278
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
279
280
281
282
283
284
		return
	}

	streamResponse(c, ch)
}

285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
func (s *Server) EmbedHandler(c *gin.Context) {
	var req api.EmbedRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	truncate := true

	if req.Truncate != nil && !*req.Truncate {
		truncate = false
	}

	var input []string

	switch i := req.Input.(type) {
	case string:
		if len(i) > 0 {
			input = append(input, i)
		}
	case []any:
		for _, v := range i {
			if _, ok := v.(string); !ok {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
				return
			}
			input = append(input, v.(string))
		}
	default:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
		return
	}

	if len(input) == 0 {
		c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
		return
	}

	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
	if err != nil {
		handleScheduleError(c, req.Model, err)
		return
	}

	kvData, err := getKVData(m.ModelPath, false)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	for i, s := range input {
		tokens, err := r.Tokenize(c.Request.Context(), s)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
		if len(tokens) > ctxLen {
			if !truncate {
				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
				return
			}

			tokens = tokens[:ctxLen]
			s, err = r.Detokenize(c.Request.Context(), tokens)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		input[i] = s
	}
	embeddings, err := r.Embed(c.Request.Context(), input)

	if err != nil {
		slog.Error("embedding generation failed", "error", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	for i, e := range embeddings {
		embeddings[i] = normalize(e)
	}

	resp := api.EmbedResponse{
		Model:      req.Model,
		Embeddings: embeddings,
	}
	c.JSON(http.StatusOK, resp)
}

func normalize(vec []float32) []float32 {
	var sum float32
	for _, v := range vec {
		sum += v * v
	}

	norm := float32(0.0)
	if sum > 0 {
		norm = float32(1.0 / math.Sqrt(float64(sum)))
	}

	for i := range vec {
		vec[i] *= norm
	}
	return vec
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
400
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
401
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
402
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
403
404
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
405
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
406
407
408
409
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
410
	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
411
	if err != nil {
Michael Yang's avatar
Michael Yang committed
412
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
413
414
415
		return
	}

416
417
418
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
419
420
421
		return
	}

422
423
	embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})

Bruce MacDonald's avatar
Bruce MacDonald committed
424
	if err != nil {
425
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
426
427
428
429
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

430
431
432
433
434
435
436
437
438
439
	embedding := make([]float64, len(embeddings[0]))

	for i, v := range embeddings[0] {
		embedding[i] = float64(v)
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
440
441
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
442
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
443
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
444
445
446
447
448
449
450
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
451
452
453
		return
	}

454
455
456
457
458
459
460
461
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
462
463
464
		return
	}

465
466
467
	ch := make(chan any)
	go func() {
		defer close(ch)
468
469
		fn := func(r api.ProgressResponse) {
			ch <- r
470
		}
471

Michael Yang's avatar
Michael Yang committed
472
		regOpts := &registryOptions{
473
474
475
			Insecure: req.Insecure,
		}

476
477
478
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

479
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
480
			ch <- gin.H{"error": err.Error()}
481
482
483
		}
	}()

484
485
486
487
488
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

489
490
491
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
492
func (s *Server) PushModelHandler(c *gin.Context) {
493
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
494
495
496
497
498
499
500
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
501
502
		return
	}
Michael Yang's avatar
Michael Yang committed
503

Michael Yang's avatar
Michael Yang committed
504
505
506
507
508
509
510
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
511
512
513
		return
	}

514
515
516
	ch := make(chan any)
	go func() {
		defer close(ch)
517
518
		fn := func(r api.ProgressResponse) {
			ch <- r
519
		}
520

Michael Yang's avatar
Michael Yang committed
521
		regOpts := &registryOptions{
522
523
524
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
525
526
527
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
528
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
529
			ch <- gin.H{"error": err.Error()}
530
531
532
		}
	}()

533
534
535
536
537
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

538
539
540
	streamResponse(c, ch)
}

541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
556
func (s *Server) CreateModelHandler(c *gin.Context) {
557
558
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
559
560
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
561
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
562
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
563
		return
564
565
	}

566
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
567
	if !name.IsValid() {
568
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
569
570
571
		return
	}

572
573
574
575
576
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

577
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
578
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
579
580
		return
	}
Michael Yang's avatar
Michael Yang committed
581

582
583
584
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
585
586
587
588
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
589
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
590

591
		sr = f
Michael Yang's avatar
Michael Yang committed
592
	}
Michael Yang's avatar
Michael Yang committed
593

594
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
595
596
597
598
599
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
600
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
601
602
	go func() {
		defer close(ch)
603
604
		fn := func(resp api.ProgressResponse) {
			ch <- resp
605
606
		}

607
608
609
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

610
611
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
612
			ch <- gin.H{"error": err.Error()}
613
		}
Michael Yang's avatar
Michael Yang committed
614
	}()
Michael Yang's avatar
Michael Yang committed
615

616
	if r.Stream != nil && !*r.Stream {
617
618
619
620
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
621
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
622
623
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
624
func (s *Server) DeleteModelHandler(c *gin.Context) {
625
626
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
627
628
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
629
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
630
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
631
632
633
		return
	}

634
635
636
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
637
638
		return
	}
Michael Yang's avatar
Michael Yang committed
639

640
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
641
642
643
644
645
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

646
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
647
648
649
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
650
651
652
653
654

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
655
656
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
657
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
658
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
659
660
661
662
663
664
665
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
666
667
668
		return
	}

Michael Yang's avatar
Michael Yang committed
669
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
670
		// noop
Michael Yang's avatar
Michael Yang committed
671
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
672
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
673
	} else {
674
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
675
676
677
		return
	}

678
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
679
	if err != nil {
680
681
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
682
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
683
684
685
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
686
687
688
689
690
691
692
693
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

694
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
695
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
696
697
698
699
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
700
	modelDetails := api.ModelDetails{
701
702
703
704
705
706
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
707
708
	}

709
	if req.System != "" {
710
		m.System = req.System
711
712
	}

Michael Yang's avatar
Michael Yang committed
713
714
715
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
716
717
	}

718
719
720
721
722
723
724
725
726
727
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
728
	resp := &api.ShowResponse{
729
730
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
731
		Template:   m.Template.String(),
732
733
734
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
735
736
737
738
	}

	var params []string
	cs := 30
739
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
740
741
742
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
743
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
744
			}
Patrick Devine's avatar
Patrick Devine committed
745
746
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
747
748
749
750
		}
	}
	resp.Parameters = strings.Join(params, "\n")

751
752
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
753
			m.Options[k] = v
754
755
756
		}
	}

757
	var sb strings.Builder
758
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
759
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
760
761
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
762
	resp.Modelfile = sb.String()
763

764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
780
781
782
	return resp, nil
}

783
func getKVData(digest string, verbose bool) (llm.KV, error) {
784
785
786
787
788
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
806
func (s *Server) ListModelsHandler(c *gin.Context) {
807
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
808
809
810
811
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
812

813
	models := []api.ListModelResponse{}
814
815
816
817
818
819
820
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
821

822
823
824
825
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
826
		}
Michael Yang's avatar
Michael Yang committed
827

828
		// tag should never be masked
829
		models = append(models, api.ListModelResponse{
830
831
832
833
834
835
836
837
838
839
840
841
842
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
843
844
	}

845
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
846
847
848
849
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
850
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
851
852
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
853
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
854
855
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
856
857
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
858
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
859
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
860
861
862
		return
	}

Michael Yang's avatar
Michael Yang committed
863
864
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
865
866
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
867
868
	}

Michael Yang's avatar
Michael Yang committed
869
870
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
871
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
872
873
		return
	}
Michael Yang's avatar
Michael Yang committed
874

875
876
877
878
879
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
880
881
882
883
884
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
885
886
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
887
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
888
889
890
891
892
893
894
895
896
897
898
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
899
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
900
901
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
902
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
903
904
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
905
906
907
908
909
910
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
911
912
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
913
914
915
916
917
918
919
920
921
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

940
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
941
942
943
944
945
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

946
947
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
948
949
950
		return
	}

Michael Yang's avatar
Michael Yang committed
951
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
952
953
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

975
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
976
	if host == "" || host == "localhost" {
977
978
979
980
981
982
983
984
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
985
986
987
		"localhost",
		"local",
		"internal",
988
	}
989

Jeffrey Morgan's avatar
Jeffrey Morgan committed
990
	// check if the host is a local TLD
991
992
993
994
995
996
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
997
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
998
}
999

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1000
1001
1002
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
1003
1004
1005
1006
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1007
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
1008
1009
1010
1011
1012
1013
1014
1015
1016
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1017
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1018
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1019
1020
1021
1022
1023
				c.Next()
				return
			}
		}

1024
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
1025
			if c.Request.Method == http.MethodOptions {
1026
1027
1028
1029
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

1030
1031
1032
1033
1034
1035
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
1036
}
1037

1038
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
1039
1040
	config := cors.DefaultConfig()
	config.AllowWildcard = true
1041
	config.AllowBrowserExtensions = true
1042
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
1043
1044
1045
1046
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
1047
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
1048

Bruce MacDonald's avatar
Bruce MacDonald committed
1049
	r := gin.Default()
1050
1051
	r.Use(
		cors.New(config),
1052
		allowedHostsMiddleware(s.addr),
1053
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
1054

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1055
1056
1057
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
1058
	r.POST("/api/embed", s.EmbedHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1059
1060
1061
1062
1063
1064
1065
1066
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1067
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1068

1069
	// Compatibility endpoints
1070
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1071
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1072
	r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
1073
1074
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
1075

Michael Yang's avatar
Michael Yang committed
1076
1077
1078
1079
1080
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1081
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
1082
1083
1084
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
1085
1086
	}

1087
1088
1089
1090
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
1091
	level := slog.LevelInfo
1092
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
1093
		level = slog.LevelDebug
1094
	}
Michael Yang's avatar
Michael Yang committed
1095

1096
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

1112
1113
1114
1115
1116
1117
1118
1119
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

1120
	if !envconfig.NoPrune {
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1136
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1137
1138
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1139
	s := &Server{addr: ln.Addr(), sched: sched}
1140
1141

	http.Handle("/", s.GenerateRoutes())
1142

1143
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1144
	srvr := &http.Server{
1145
1146
1147
1148
1149
1150
1151
1152
1153
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1154
1155
	}

1156
1157
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1158
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1159
1160
	go func() {
		<-signals
1161
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1162
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1163
		sched.unloadAllRunners()
1164
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1165
		done()
1166
1167
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1168
	if err := llm.Init(); err != nil {
1169
1170
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1171

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1172
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1173
1174
1175

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1176
1177
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1178

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1179
1180
1181
1182
1183
1184
1185
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1186
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1187
}
Michael Yang's avatar
Michael Yang committed
1188

1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1214
func streamResponse(c *gin.Context, ch chan any) {
1215
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1216
1217
1218
1219
1220
1221
1222
1223
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1224
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1225
1226
1227
			return false
		}

1228
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1229
1230
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1231
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1232
1233
1234
1235
1236
1237
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1238

1239
func (s *Server) ProcessHandler(c *gin.Context) {
1240
	models := []api.ProcessModelResponse{}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1252
		mr := api.ProcessModelResponse{
1253
1254
1255
1256
1257
1258
1259
1260
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1261
1262
1263
1264
1265
1266
1267
1268
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1269
1270
1271
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1272
1273
1274
1275
1276
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1277
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1278
1279
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1280
func (s *Server) ChatHandler(c *gin.Context) {
1281
1282
	checkpointStart := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
1283
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1284
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1285
1286
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1287
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1288
1289
1290
1291
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1292
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
tools  
Michael Yang committed
1293
1294
1295
1296
	if req.Tools != nil {
		caps = append(caps, CapabilityTools)
	}

Michael Yang's avatar
Michael Yang committed
1297
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1298
1299
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1300
		return
Michael Yang's avatar
Michael Yang committed
1301
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1302
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1303
1304
		return
	}
Michael Yang's avatar
Michael Yang committed
1305

1306
1307
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
1308
1309
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1310
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1311
1312
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1313
1314
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1315
		})
1316
1317
1318
		return
	}

1319
	if req.Messages[0].Role != "system" && m.System != "" {
1320
1321
1322
		req.Messages = append([]api.Message{{Role: "system", Content: m.System}}, req.Messages...)
	}

Michael Yang's avatar
tools  
Michael Yang committed
1323
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages, req.Tools)
Michael Yang's avatar
Michael Yang committed
1324
1325
1326
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1327
1328
	}

Michael Yang's avatar
Michael Yang committed
1329
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1330

Bruce MacDonald's avatar
Bruce MacDonald committed
1331
1332
1333
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1334
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1335
1336
1337
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1338
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1339
		}, func(r llm.CompletionResponse) {
1340
			res := api.ChatResponse{
1341
1342
1343
1344
1345
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1346
1347
1348
1349
1350
1351
1352
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
1353
1354
1355
1356
1357
1358
1359

			if r.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
1360
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1361
1362
1363
1364
1365
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
tools  
Michael Yang committed
1366
		var resp api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1367
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1368
1369
		for rr := range ch {
			switch t := rr.(type) {
1370
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1371
				sb.WriteString(t.Message.Content)
Michael Yang's avatar
tools  
Michael Yang committed
1372
				resp = t
1373
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1374
1375
1376
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1377
				}
Michael Yang's avatar
Michael Yang committed
1378
1379
1380

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1381
			default:
Michael Yang's avatar
Michael Yang committed
1382
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1383
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1384
1385
			}
		}
1386

Michael Yang's avatar
tools  
Michael Yang committed
1387
		resp.Message.Content = sb.String()
1388
1389
1390
1391
1392
1393

		if len(req.Tools) > 0 {
			if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
				resp.Message.ToolCalls = toolCalls
				resp.Message.Content = ""
			}
Michael Yang's avatar
tools  
Michael Yang committed
1394
1395
1396
		}

		c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
1397
1398
1399
1400
1401
		return
	}

	streamResponse(c, ch)
}
1402

Michael Yang's avatar
Michael Yang committed
1403
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1404
	switch {
1405
	case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
Michael Yang's avatar
Michael Yang committed
1406
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1407
	case errors.Is(err, context.Canceled):
1408
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1409
	case errors.Is(err, ErrMaxQueue):
1410
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1411
1412
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1413
1414
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1415
1416
	}
}