routes.go 35 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
12
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
14
	"net"
	"net/http"
15
	"net/netip"
16
	"os"
17
	"os/signal"
Michael Yang's avatar
Michael Yang committed
18
	"path/filepath"
19
	"slices"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
	"github.com/gin-gonic/gin"

27
	"github.com/ollama/ollama/api"
28
	"github.com/ollama/ollama/envconfig"
29
30
31
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
32
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
33
	"github.com/ollama/ollama/template"
34
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
35
	"github.com/ollama/ollama/types/model"
36
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
37
38
)

Michael Yang's avatar
Michael Yang committed
39
40
var mode string = gin.DebugMode

41
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	addr  net.Addr
	sched *Scheduler
44
45
}

Michael Yang's avatar
Michael Yang committed
46
47
48
49
50
51
52
53
54
55
56
57
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
Michael Yang committed
58
59
var errRequired = errors.New("is required")

60
61
62
63
64
65
66
67
68
69
70
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
71
72
}

Michael Yang's avatar
Michael Yang committed
73
74
75
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
76
	if name == "" {
Michael Yang's avatar
Michael Yang committed
77
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
78
79
	}

Michael Yang's avatar
Michael Yang committed
80
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
81
	if err != nil {
Michael Yang's avatar
Michael Yang committed
82
		return nil, nil, nil, err
83
84
	}

Michael Yang's avatar
Michael Yang committed
85
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
86
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
87
88
	}

Michael Yang's avatar
Michael Yang committed
89
	opts, err := modelOptions(model, requestOpts)
90
	if err != nil {
Michael Yang's avatar
Michael Yang committed
91
		return nil, nil, nil, err
92
93
	}

Michael Yang's avatar
Michael Yang committed
94
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
96
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
97
98
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
99
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
100
101
	}

Michael Yang's avatar
Michael Yang committed
102
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
103
104
105
}

func (s *Server) GenerateHandler(c *gin.Context) {
106
	checkpointStart := time.Now()
Michael Yang's avatar
Michael Yang committed
107
108
109
110
111
112
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
115
		return
	}

Michael Yang's avatar
Michael Yang committed
116
117
118
119
120
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
121
122
123
		return
	}

Michael Yang's avatar
Michael Yang committed
124
	caps := []Capability{CapabilityCompletion}
125
126
127
128
	if req.Suffix != "" {
		caps = append(caps, CapabilityInsert)
	}

Michael Yang's avatar
Michael Yang committed
129
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
130
131
132
133
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
134
135
136
137
		handleScheduleError(c, req.Model, err)
		return
	}

138
139
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
140
141
142
143
144
145
146
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
147
148
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
149

Michael Yang's avatar
Michael Yang committed
150
151
152
153
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
154

Michael Yang's avatar
Michael Yang committed
155
156
	prompt := req.Prompt
	if !req.Raw {
Michael Yang's avatar
Michael Yang committed
157
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
158
159
160
161
162
163
164
165
166
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		var b bytes.Buffer
Bruce MacDonald's avatar
Bruce MacDonald committed
167
		if req.Context != nil {
Michael Yang's avatar
Michael Yang committed
168
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
169
170
171
172
173
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Michael Yang's avatar
Michael Yang committed
174
			b.WriteString(s)
175
176
		}

177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
		var values template.Values
		if req.Suffix != "" {
			values.Prompt = prompt
			values.Suffix = req.Suffix
		} else {
			var msgs []api.Message
			if req.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: req.System})
			} else if m.System != "" {
				msgs = append(msgs, api.Message{Role: "system", Content: m.System})
			}

			for _, i := range images {
				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
			}

			values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
		}

		if err := tmpl.Execute(&b, values); err != nil {
Michael Yang's avatar
Michael Yang committed
197
198
199
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
200

Michael Yang's avatar
Michael Yang committed
201
		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
202
203
	}

Michael Yang's avatar
Michael Yang committed
204
	slog.Debug("generate request", "prompt", prompt, "images", images)
205

Bruce MacDonald's avatar
Bruce MacDonald committed
206
207
	ch := make(chan any)
	go func() {
208
209
		// TODO (jmorganca): avoid building the response twice both here and below
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
210
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
211
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
212
213
214
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
215
			Options: opts,
216
217
		}, func(cr llm.CompletionResponse) {
			res := api.GenerateResponse{
218
219
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
220
221
222
				Response:   cr.Content,
				Done:       cr.Done,
				DoneReason: cr.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
223
				Metrics: api.Metrics{
224
225
226
227
					PromptEvalCount:    cr.PromptEvalCount,
					PromptEvalDuration: cr.PromptEvalDuration,
					EvalCount:          cr.EvalCount,
					EvalDuration:       cr.EvalDuration,
Bruce MacDonald's avatar
Bruce MacDonald committed
228
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
229
			}
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249

			if _, err := sb.WriteString(cr.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
			}

			if cr.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
					tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
					res.Context = append(req.Context, tokens...)
				}
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
250
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
251
252
253
254
255
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
256
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
257
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
258
259
		for rr := range ch {
			switch t := rr.(type) {
260
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
261
262
				sb.WriteString(t.Response)
				r = t
263
			case gin.H:
Michael Yang's avatar
Michael Yang committed
264
265
266
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
267
				}
Michael Yang's avatar
Michael Yang committed
268
269
270

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
271
			default:
Michael Yang's avatar
Michael Yang committed
272
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
273
274
275
				return
			}
		}
276

Michael Yang's avatar
Michael Yang committed
277
		r.Response = sb.String()
Michael Yang's avatar
tools  
Michael Yang committed
278
279
280
281
282
		if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
			r.ToolCalls = toolCalls
			r.Response = ""
		}

Michael Yang's avatar
Michael Yang committed
283
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
284
285
286
287
288
289
		return
	}

	streamResponse(c, ch)
}

290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
func (s *Server) EmbedHandler(c *gin.Context) {
	var req api.EmbedRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	truncate := true

	if req.Truncate != nil && !*req.Truncate {
		truncate = false
	}

	var input []string

	switch i := req.Input.(type) {
	case string:
		if len(i) > 0 {
			input = append(input, i)
		}
	case []any:
		for _, v := range i {
			if _, ok := v.(string); !ok {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
				return
			}
			input = append(input, v.(string))
		}
	default:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid input type"})
		return
	}

	if len(input) == 0 {
		c.JSON(http.StatusOK, api.EmbedResponse{Model: req.Model, Embeddings: [][]float32{}})
		return
	}

	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
	if err != nil {
		handleScheduleError(c, req.Model, err)
		return
	}

	kvData, err := getKVData(m.ModelPath, false)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	for i, s := range input {
		tokens, err := r.Tokenize(c.Request.Context(), s)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
		if len(tokens) > ctxLen {
			if !truncate {
				c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
				return
			}

			tokens = tokens[:ctxLen]
			s, err = r.Detokenize(c.Request.Context(), tokens)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		input[i] = s
	}
	embeddings, err := r.Embed(c.Request.Context(), input)

	if err != nil {
		slog.Error("embedding generation failed", "error", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	for i, e := range embeddings {
		embeddings[i] = normalize(e)
	}

	resp := api.EmbedResponse{
		Model:      req.Model,
		Embeddings: embeddings,
	}
	c.JSON(http.StatusOK, resp)
}

func normalize(vec []float32) []float32 {
	var sum float32
	for _, v := range vec {
		sum += v * v
	}

	norm := float32(0.0)
	if sum > 0 {
		norm = float32(1.0 / math.Sqrt(float64(sum)))
	}

	for i := range vec {
		vec[i] *= norm
	}
	return vec
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
405
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
406
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
407
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
408
409
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
410
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
411
412
413
414
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
415
	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
416
	if err != nil {
Michael Yang's avatar
Michael Yang committed
417
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
418
419
420
		return
	}

421
422
423
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
424
425
426
		return
	}

427
428
	embeddings, err := r.Embed(c.Request.Context(), []string{req.Prompt})

Bruce MacDonald's avatar
Bruce MacDonald committed
429
	if err != nil {
430
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
431
432
433
434
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

435
436
437
438
439
440
441
442
443
444
	embedding := make([]float64, len(embeddings[0]))

	for i, v := range embeddings[0] {
		embedding[i] = float64(v)
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
445
446
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
447
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
448
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
449
450
451
452
453
454
455
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
456
457
458
		return
	}

459
460
461
462
463
464
465
466
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
467
468
469
		return
	}

470
471
472
	ch := make(chan any)
	go func() {
		defer close(ch)
473
474
		fn := func(r api.ProgressResponse) {
			ch <- r
475
		}
476

Michael Yang's avatar
Michael Yang committed
477
		regOpts := &registryOptions{
478
479
480
			Insecure: req.Insecure,
		}

481
482
483
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

484
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
485
			ch <- gin.H{"error": err.Error()}
486
487
488
		}
	}()

489
490
491
492
493
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

494
495
496
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
497
func (s *Server) PushModelHandler(c *gin.Context) {
498
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
499
500
501
502
503
504
505
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
506
507
		return
	}
Michael Yang's avatar
Michael Yang committed
508

Michael Yang's avatar
Michael Yang committed
509
510
511
512
513
514
515
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
516
517
518
		return
	}

519
520
521
	ch := make(chan any)
	go func() {
		defer close(ch)
522
523
		fn := func(r api.ProgressResponse) {
			ch <- r
524
		}
525

Michael Yang's avatar
Michael Yang committed
526
		regOpts := &registryOptions{
527
528
529
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
530
531
532
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
533
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
534
			ch <- gin.H{"error": err.Error()}
535
536
537
		}
	}()

538
539
540
541
542
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

543
544
545
	streamResponse(c, ch)
}

546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
561
func (s *Server) CreateModelHandler(c *gin.Context) {
562
563
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
564
565
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
566
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
567
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
568
		return
569
570
	}

571
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
572
	if !name.IsValid() {
573
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
574
575
576
		return
	}

577
578
579
580
581
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

582
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
583
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
584
585
		return
	}
Michael Yang's avatar
Michael Yang committed
586

587
588
589
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
590
591
592
593
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
594
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
595

596
		sr = f
Michael Yang's avatar
Michael Yang committed
597
	}
Michael Yang's avatar
Michael Yang committed
598

599
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
600
601
602
603
604
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
605
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
606
607
	go func() {
		defer close(ch)
608
609
		fn := func(resp api.ProgressResponse) {
			ch <- resp
610
611
		}

612
613
614
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

615
616
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
617
			ch <- gin.H{"error": err.Error()}
618
		}
Michael Yang's avatar
Michael Yang committed
619
	}()
Michael Yang's avatar
Michael Yang committed
620

621
	if r.Stream != nil && !*r.Stream {
622
623
624
625
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
626
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
627
628
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
629
func (s *Server) DeleteModelHandler(c *gin.Context) {
630
631
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
632
633
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
634
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
635
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
636
637
638
		return
	}

639
640
641
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
642
643
		return
	}
Michael Yang's avatar
Michael Yang committed
644

645
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
646
647
648
649
650
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

651
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
652
653
654
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
655
656
657
658
659

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
660
661
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
662
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
663
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
664
665
666
667
668
669
670
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
671
672
673
		return
	}

Michael Yang's avatar
Michael Yang committed
674
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
675
		// noop
Michael Yang's avatar
Michael Yang committed
676
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
677
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
678
	} else {
679
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
680
681
682
		return
	}

683
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
684
	if err != nil {
685
686
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
687
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
688
689
690
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
691
692
693
694
695
696
697
698
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

699
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
700
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
701
702
703
704
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
705
	modelDetails := api.ModelDetails{
706
707
708
709
710
711
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
712
713
	}

714
	if req.System != "" {
715
		m.System = req.System
716
717
	}

Michael Yang's avatar
Michael Yang committed
718
719
720
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
721
722
	}

723
724
725
726
727
728
729
730
731
732
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
733
	resp := &api.ShowResponse{
734
735
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
736
		Template:   m.Template.String(),
737
738
739
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
740
741
742
743
	}

	var params []string
	cs := 30
744
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
745
746
747
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
748
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
749
			}
Patrick Devine's avatar
Patrick Devine committed
750
751
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
752
753
754
755
		}
	}
	resp.Parameters = strings.Join(params, "\n")

756
757
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
758
			m.Options[k] = v
759
760
761
		}
	}

762
	var sb strings.Builder
763
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
764
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
765
766
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
767
	resp.Modelfile = sb.String()
768

769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
785
786
787
	return resp, nil
}

788
func getKVData(digest string, verbose bool) (llm.KV, error) {
789
790
791
792
793
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
811
func (s *Server) ListModelsHandler(c *gin.Context) {
812
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
813
814
815
816
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
817

818
	models := []api.ListModelResponse{}
819
820
821
822
823
824
825
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
826

827
828
829
830
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
831
		}
Michael Yang's avatar
Michael Yang committed
832

833
		// tag should never be masked
834
		models = append(models, api.ListModelResponse{
835
836
837
838
839
840
841
842
843
844
845
846
847
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
848
849
	}

850
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
851
852
853
854
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
855
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
856
857
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
858
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
859
860
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
861
862
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
863
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
864
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
865
866
867
		return
	}

Michael Yang's avatar
Michael Yang committed
868
869
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
870
871
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
872
873
	}

Michael Yang's avatar
Michael Yang committed
874
875
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
876
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
877
878
		return
	}
Michael Yang's avatar
Michael Yang committed
879

880
881
882
883
884
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
885
886
887
888
889
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
890
891
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
892
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
893
894
895
896
897
898
899
900
901
902
903
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
904
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
905
906
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
907
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
908
909
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
910
911
912
913
914
915
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
916
917
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
918
919
920
921
922
923
924
925
926
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

945
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
946
947
948
949
950
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

951
952
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
953
954
955
		return
	}

Michael Yang's avatar
Michael Yang committed
956
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
957
958
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

980
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
981
	if host == "" || host == "localhost" {
982
983
984
985
986
987
988
989
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
990
991
992
		"localhost",
		"local",
		"internal",
993
	}
994

Jeffrey Morgan's avatar
Jeffrey Morgan committed
995
	// check if the host is a local TLD
996
997
998
999
1000
1001
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1002
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1003
}
1004

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1005
1006
1007
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
1008
1009
1010
1011
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1012
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
1013
1014
1015
1016
1017
1018
1019
1020
1021
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
1022
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1023
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1024
1025
1026
1027
1028
				c.Next()
				return
			}
		}

1029
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
1030
			if c.Request.Method == http.MethodOptions {
1031
1032
1033
1034
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

1035
1036
1037
1038
1039
1040
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
1041
}
1042

1043
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
1044
1045
	config := cors.DefaultConfig()
	config.AllowWildcard = true
1046
	config.AllowBrowserExtensions = true
1047
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
1048
1049
1050
1051
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
1052
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
1053

Bruce MacDonald's avatar
Bruce MacDonald committed
1054
	r := gin.Default()
1055
1056
	r.Use(
		cors.New(config),
1057
		allowedHostsMiddleware(s.addr),
1058
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
1059

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1060
1061
1062
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
1063
	r.POST("/api/embed", s.EmbedHandler)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1064
1065
1066
1067
1068
1069
1070
1071
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1072
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1073

1074
	// Compatibility endpoints
1075
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1076
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1077
	r.POST("/v1/embeddings", openai.EmbeddingsMiddleware(), s.EmbedHandler)
1078
1079
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
1080

Michael Yang's avatar
Michael Yang committed
1081
1082
1083
1084
1085
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1086
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
1087
1088
1089
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
1090
1091
	}

1092
1093
1094
1095
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
1096
	level := slog.LevelInfo
1097
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
1098
		level = slog.LevelDebug
1099
	}
Michael Yang's avatar
Michael Yang committed
1100

1101
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

1117
1118
1119
1120
1121
1122
1123
1124
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

1125
	if !envconfig.NoPrune {
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1141
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1142
1143
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1144
	s := &Server{addr: ln.Addr(), sched: sched}
1145
1146

	http.Handle("/", s.GenerateRoutes())
1147

1148
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1149
	srvr := &http.Server{
1150
1151
1152
1153
1154
1155
1156
1157
1158
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1159
1160
	}

1161
1162
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1163
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1164
1165
	go func() {
		<-signals
1166
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1167
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1168
		sched.unloadAllRunners()
1169
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1170
		done()
1171
1172
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1173
	if err := llm.Init(); err != nil {
1174
1175
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1176

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1177
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1178
1179
1180

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1181
1182
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1183

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1184
1185
1186
1187
1188
1189
1190
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1191
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1192
}
Michael Yang's avatar
Michael Yang committed
1193

1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1219
func streamResponse(c *gin.Context, ch chan any) {
1220
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1221
1222
1223
1224
1225
1226
1227
1228
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1229
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1230
1231
1232
			return false
		}

1233
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1234
1235
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1236
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1237
1238
1239
1240
1241
1242
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1243

1244
func (s *Server) ProcessHandler(c *gin.Context) {
1245
	models := []api.ProcessModelResponse{}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1257
		mr := api.ProcessModelResponse{
1258
1259
1260
1261
1262
1263
1264
1265
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1266
1267
1268
1269
1270
1271
1272
1273
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1274
1275
1276
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1277
1278
1279
1280
1281
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1282
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1283
1284
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1285
func (s *Server) ChatHandler(c *gin.Context) {
1286
1287
	checkpointStart := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
1288
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1289
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1290
1291
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1292
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1293
1294
1295
1296
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1297
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
tools  
Michael Yang committed
1298
1299
1300
1301
	if req.Tools != nil {
		caps = append(caps, CapabilityTools)
	}

Michael Yang's avatar
Michael Yang committed
1302
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1303
1304
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1305
		return
Michael Yang's avatar
Michael Yang committed
1306
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1307
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1308
1309
		return
	}
Michael Yang's avatar
Michael Yang committed
1310

1311
1312
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
1313
1314
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1315
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1316
1317
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1318
1319
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1320
		})
1321
1322
1323
		return
	}

1324
	if req.Messages[0].Role != "system" && m.System != "" {
1325
1326
1327
		req.Messages = append([]api.Message{{Role: "system", Content: m.System}}, req.Messages...)
	}

Michael Yang's avatar
tools  
Michael Yang committed
1328
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages, req.Tools)
Michael Yang's avatar
Michael Yang committed
1329
1330
1331
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1332
1333
	}

Michael Yang's avatar
Michael Yang committed
1334
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1335

Bruce MacDonald's avatar
Bruce MacDonald committed
1336
1337
1338
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1339
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1340
1341
1342
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1343
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1344
		}, func(r llm.CompletionResponse) {
1345
			res := api.ChatResponse{
1346
1347
1348
1349
1350
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1351
1352
1353
1354
1355
1356
1357
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
1358
1359
1360
1361
1362
1363
1364

			if r.Done {
				res.TotalDuration = time.Since(checkpointStart)
				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
			}

			ch <- res
Michael Yang's avatar
Michael Yang committed
1365
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1366
1367
1368
1369
1370
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
tools  
Michael Yang committed
1371
		var resp api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1372
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1373
1374
		for rr := range ch {
			switch t := rr.(type) {
1375
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1376
				sb.WriteString(t.Message.Content)
Michael Yang's avatar
tools  
Michael Yang committed
1377
				resp = t
1378
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1379
1380
1381
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1382
				}
Michael Yang's avatar
Michael Yang committed
1383
1384
1385

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1386
			default:
Michael Yang's avatar
Michael Yang committed
1387
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1388
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1389
1390
			}
		}
1391

Michael Yang's avatar
tools  
Michael Yang committed
1392
1393
1394
1395
1396
1397
1398
		resp.Message.Content = sb.String()
		if toolCalls, ok := m.parseToolCalls(sb.String()); ok {
			resp.Message.ToolCalls = toolCalls
			resp.Message.Content = ""
		}

		c.JSON(http.StatusOK, resp)
Bruce MacDonald's avatar
Bruce MacDonald committed
1399
1400
1401
1402
1403
		return
	}

	streamResponse(c, ch)
}
1404

Michael Yang's avatar
Michael Yang committed
1405
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1406
	switch {
1407
	case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
Michael Yang's avatar
Michael Yang committed
1408
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1409
	case errors.Is(err, context.Canceled):
1410
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1411
	case errors.Is(err, ErrMaxQueue):
1412
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1413
1414
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1415
1416
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1417
1418
	}
}