routes.go 34.7 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"cmp"
5
	"context"
Michael Yang's avatar
Michael Yang committed
6
	"encoding/json"
7
	"errors"
8
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
9
	"io"
10
	"io/fs"
11
	"log/slog"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
12
13
	"net"
	"net/http"
14
	"net/netip"
15
	"os"
16
	"os/signal"
Michael Yang's avatar
Michael Yang committed
17
	"path/filepath"
18
	"slices"
Michael Yang's avatar
Michael Yang committed
19
	"strings"
20
	"syscall"
21
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Michael Yang's avatar
Michael Yang committed
23
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24
25
	"github.com/gin-gonic/gin"

26
	"github.com/ollama/ollama/api"
27
	"github.com/ollama/ollama/envconfig"
28
29
30
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
31
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
32
	"github.com/ollama/ollama/template"
33
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
34
	"github.com/ollama/ollama/types/model"
35
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36
37
)

Michael Yang's avatar
Michael Yang committed
38
39
var mode string = gin.DebugMode

40
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
41
42
	addr  net.Addr
	sched *Scheduler
43
44
}

Michael Yang's avatar
Michael Yang committed
45
46
47
48
49
50
51
52
53
54
55
56
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

57
58
59
60
61
62
63
64
65
66
67
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
68
69
}

70
71
72
73
74
75
func isSupportedImageType(image []byte) bool {
	contentType := http.DetectContentType(image)
	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
	return slices.Contains(allowedTypes, contentType)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
func (s *Server) GenerateHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
	checkpointStart := time.Now()
	var req api.GenerateRequest
Michael Yang's avatar
Michael Yang committed
79
	err := c.ShouldBindJSON(&req)
Patrick Devine's avatar
Patrick Devine committed
80

Michael Yang's avatar
Michael Yang committed
81
82
83
84
85
86
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
87
88
89
		return
	}

90
91
92
	// validate the request
	switch {
	case req.Model == "":
93
94
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
95
96
97
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
98
99
100
	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
		return
101
102
	}

103
104
105
106
107
108
109
	for _, img := range req.Images {
		if !isSupportedImageType(img) {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
			return
		}
	}

110
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
111
	if err != nil {
112
		var pErr *fs.PathError
113
		if errors.As(err, &pErr) {
114
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
115
116
117
118
119
120
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
121
122
	if !model.Has(CapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support generate", req.Model)})
123
124
125
		return
	}

126
127
128
129
130
131
	opts, err := modelOptions(model, req.Options)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

132
	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
134
135
136
	var runner *runnerRef
	select {
	case runner = <-rCh:
	case err = <-eCh:
137
		handleErrorResponse(c, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
140
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
141
	// an empty request loads the model
142
143
	// note: for a short while template was used in lieu
	// of `raw` mode so we need to check for it too
Bruce MacDonald's avatar
Bruce MacDonald committed
144
	if req.Prompt == "" && req.Template == "" && req.System == "" {
145
		c.JSON(http.StatusOK, api.GenerateResponse{
146
147
148
149
			CreatedAt:  time.Now().UTC(),
			Model:      req.Model,
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
150
		})
Bruce MacDonald's avatar
Bruce MacDonald committed
151
152
153
		return
	}

Michael Yang's avatar
Michael Yang committed
154
155
156
157
158
159
	tmpl, err := template.Parse(req.Template)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
160
161
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
162
163
164
165
166
	var prompt string
	switch {
	case req.Raw:
		prompt = req.Prompt
	case req.Prompt != "":
167
		if req.Template == "" {
Michael Yang's avatar
Michael Yang committed
168
			tmpl = model.Template
Bruce MacDonald's avatar
Bruce MacDonald committed
169
170
		}

171
172
173
174
175
176
177
178
179
		if req.System == "" {
			req.System = model.System
		}

		slog.Debug("generate handler", "prompt", req.Prompt)
		slog.Debug("generate handler", "template", req.Template)
		slog.Debug("generate handler", "system", req.System)

		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
180
181
182
183
184
185
		for i := range req.Images {
			fmt.Fprintf(&sb, "[img-%d] ", i)
		}

		sb.WriteString(req.Prompt)

Michael Yang's avatar
Michael Yang committed
186
		p, err := Prompt(tmpl, req.System, sb.String(), "", true)
Michael Yang's avatar
Michael Yang committed
187
188
189
190
191
192
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		sb.Reset()
Bruce MacDonald's avatar
Bruce MacDonald committed
193
		if req.Context != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
			prev, err := runner.llama.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
195
196
197
198
199
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

200
			sb.WriteString(prev)
201
202
		}

203
204
205
		sb.WriteString(p)

		prompt = sb.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
206
207
	}

Michael Yang's avatar
Michael Yang committed
208
	slog.Debug("generate handler", "prompt", prompt)
209

Bruce MacDonald's avatar
Bruce MacDonald committed
210
	ch := make(chan any)
Bruce MacDonald's avatar
Bruce MacDonald committed
211
	var generated strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
212
213
214
	go func() {
		defer close(ch)

215
		fn := func(r llm.CompletionResponse) {
Bruce MacDonald's avatar
Bruce MacDonald committed
216
217
218
219
			// Build up the full response
			if _, err := generated.WriteString(r.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
220
221
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
222
			resp := api.GenerateResponse{
223
224
225
226
227
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Done:       r.Done,
				Response:   r.Content,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
228
229
230
231
232
233
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
234
235
			}

236
237
238
239
240
			if r.Done {
				resp.TotalDuration = time.Since(checkpointStart)
				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
Michael Yang's avatar
Michael Yang committed
241
					p, err := Prompt(tmpl, req.System, req.Prompt, generated.String(), false)
242
					if err != nil {
243
						c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
244
245
						return
					}
246
247

					// TODO (jmorganca): encode() should not strip special tokens
Daniel Hiltgen's avatar
Daniel Hiltgen committed
248
					tokens, err := runner.llama.Tokenize(c.Request.Context(), p)
249
250
251
252
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
253
254

					resp.Context = append(req.Context, tokens...)
Bruce MacDonald's avatar
Bruce MacDonald committed
255
256
257
258
				}
			}

			ch <- resp
Bruce MacDonald's avatar
Bruce MacDonald committed
259
260
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
261
		var images []llm.ImageData
Michael Yang's avatar
Michael Yang committed
262
		for i := range req.Images {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
263
264
265
266
			images = append(images, llm.ImageData{
				ID:   i,
				Data: req.Images[i],
			})
Michael Yang's avatar
Michael Yang committed
267
268
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
269
		// Start prediction
270
		req := llm.CompletionRequest{
271
272
			Prompt:  prompt,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
273
			Images:  images,
274
			Options: opts,
Bruce MacDonald's avatar
Bruce MacDonald committed
275
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
276
		if err := runner.llama.Completion(c.Request.Context(), req, fn); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
277
278
279
280
281
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
282
283
		// Accumulate responses into the final response
		var final api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
284
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
285
		for resp := range ch {
286
287
288
289
290
291
292
293
294
295
296
297
298
299
			switch r := resp.(type) {
			case api.GenerateResponse:
				sb.WriteString(r.Response)
				final = r
			case gin.H:
				if errorMsg, ok := r["error"].(string); ok {
					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
					return
				} else {
					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
					return
				}
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
Bruce MacDonald's avatar
Bruce MacDonald committed
300
301
302
				return
			}
		}
303
304
305

		final.Response = sb.String()
		c.JSON(http.StatusOK, final)
Bruce MacDonald's avatar
Bruce MacDonald committed
306
307
308
309
310
311
		return
	}

	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
312
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
	var req api.EmbeddingRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if req.Model == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

329
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
330
	if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
331
		var pErr *fs.PathError
332
		if errors.As(err, &pErr) {
Bruce MacDonald's avatar
Bruce MacDonald committed
333
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
334
335
336
337
338
339
340
341
342
343
344
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	opts, err := modelOptions(model, req.Options)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
345

346
	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
347
348
349
350
	var runner *runnerRef
	select {
	case runner = <-rCh:
	case err = <-eCh:
351
		handleErrorResponse(c, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
352
353
354
		return
	}

355
356
357
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
358
359
360
		return
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
361
	embedding, err := runner.llama.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
362
	if err != nil {
363
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
364
365
366
367
368
369
370
371
372
373
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
374
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
375
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
376
377
378
379
380
381
382
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
383
384
385
		return
	}

386
387
388
389
390
391
392
393
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
394
395
396
		return
	}

397
398
399
	ch := make(chan any)
	go func() {
		defer close(ch)
400
401
		fn := func(r api.ProgressResponse) {
			ch <- r
402
		}
403

Michael Yang's avatar
Michael Yang committed
404
		regOpts := &registryOptions{
405
406
407
			Insecure: req.Insecure,
		}

408
409
410
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

411
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
412
			ch <- gin.H{"error": err.Error()}
413
414
415
		}
	}()

416
417
418
419
420
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

421
422
423
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
424
func (s *Server) PushModelHandler(c *gin.Context) {
425
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
426
427
428
429
430
431
432
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
433
434
		return
	}
Michael Yang's avatar
Michael Yang committed
435

Michael Yang's avatar
Michael Yang committed
436
437
438
439
440
441
442
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
443
444
445
		return
	}

446
447
448
	ch := make(chan any)
	go func() {
		defer close(ch)
449
450
		fn := func(r api.ProgressResponse) {
			ch <- r
451
		}
452

Michael Yang's avatar
Michael Yang committed
453
		regOpts := &registryOptions{
454
455
456
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
457
458
459
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
460
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
461
			ch <- gin.H{"error": err.Error()}
462
463
464
		}
	}()

465
466
467
468
469
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

470
471
472
	streamResponse(c, ch)
}

473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
488
func (s *Server) CreateModelHandler(c *gin.Context) {
489
490
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
491
492
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
493
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
494
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
495
		return
496
497
	}

498
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
499
	if !name.IsValid() {
500
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
501
502
503
		return
	}

504
505
506
507
508
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

509
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
510
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
511
512
		return
	}
Michael Yang's avatar
Michael Yang committed
513

514
515
516
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
517
518
519
520
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
521
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
522

523
		sr = f
Michael Yang's avatar
Michael Yang committed
524
	}
Michael Yang's avatar
Michael Yang committed
525

526
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
527
528
529
530
531
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
532
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
533
534
	go func() {
		defer close(ch)
535
536
		fn := func(resp api.ProgressResponse) {
			ch <- resp
537
538
		}

539
540
541
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

542
543
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
544
			ch <- gin.H{"error": err.Error()}
545
		}
Michael Yang's avatar
Michael Yang committed
546
	}()
Michael Yang's avatar
Michael Yang committed
547

548
	if r.Stream != nil && !*r.Stream {
549
550
551
552
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
553
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
554
555
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
556
func (s *Server) DeleteModelHandler(c *gin.Context) {
557
558
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
559
560
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
561
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
562
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
563
564
565
		return
	}

566
567
568
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
569
570
		return
	}
Michael Yang's avatar
Michael Yang committed
571

572
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
573
574
575
576
577
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

578
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
579
580
581
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
582
583
584
585
586

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
587
588
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
589
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
590
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
591
592
593
594
595
596
597
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
598
599
600
		return
	}

Michael Yang's avatar
Michael Yang committed
601
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
602
		// noop
Michael Yang's avatar
Michael Yang committed
603
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
604
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
605
	} else {
606
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
607
608
609
		return
	}

610
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
611
	if err != nil {
612
613
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
614
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
615
616
617
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
618
619
620
621
622
623
624
625
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

626
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
627
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
628
629
630
631
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
632
	modelDetails := api.ModelDetails{
633
634
635
636
637
638
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
639
640
	}

641
	if req.System != "" {
642
		m.System = req.System
643
644
645
	}

	if req.Template != "" {
Michael Yang's avatar
Michael Yang committed
646
647
648
649
		m.Template, err = template.Parse(req.Template)
		if err != nil {
			return nil, err
		}
650
651
	}

652
	msgs := make([]api.Message, 0)
653
	for _, msg := range m.Messages {
654
655
656
		msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
	}

657
658
659
660
661
662
663
664
665
666
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
667
	resp := &api.ShowResponse{
668
669
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
670
		Template:   m.Template.String(),
671
672
673
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
674
675
676
677
	}

	var params []string
	cs := 30
678
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
679
680
681
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
682
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
683
			}
Patrick Devine's avatar
Patrick Devine committed
684
685
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
686
687
688
689
		}
	}
	resp.Parameters = strings.Join(params, "\n")

690
691
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
692
			m.Options[k] = v
693
694
695
		}
	}

696
	var sb strings.Builder
697
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
698
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
699
700
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
701
	resp.Modelfile = sb.String()
702

703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
719
720
721
	return resp, nil
}

722
func getKVData(digest string, verbose bool) (llm.KV, error) {
723
724
725
726
727
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
745
func (s *Server) ListModelsHandler(c *gin.Context) {
746
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
747
748
749
750
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
751

752
	models := []api.ListModelResponse{}
753
754
755
756
757
758
759
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
760

761
762
763
764
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
765
		}
Michael Yang's avatar
Michael Yang committed
766

767
		// tag should never be masked
768
		models = append(models, api.ListModelResponse{
769
770
771
772
773
774
775
776
777
778
779
780
781
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
782
783
	}

784
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
785
786
787
788
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
789
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
790
791
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
792
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
793
794
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
795
796
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
797
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
798
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
799
800
801
		return
	}

Michael Yang's avatar
Michael Yang committed
802
803
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
804
805
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
806
807
	}

Michael Yang's avatar
Michael Yang committed
808
809
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
810
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
811
812
		return
	}
Michael Yang's avatar
Michael Yang committed
813

814
815
816
817
818
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
819
820
821
822
823
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
824
825
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
826
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
827
828
829
830
831
832
833
834
835
836
837
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
838
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
839
840
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
841
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
842
843
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
844
845
846
847
848
849
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
850
851
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
852
853
854
855
856
857
858
859
860
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

879
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
880
881
882
883
884
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

885
886
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
887
888
889
		return
	}

Michael Yang's avatar
Michael Yang committed
890
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
891
892
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

914
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
915
	if host == "" || host == "localhost" {
916
917
918
919
920
921
922
923
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
924
925
926
		"localhost",
		"local",
		"internal",
927
	}
928

Jeffrey Morgan's avatar
Jeffrey Morgan committed
929
	// check if the host is a local TLD
930
931
932
933
934
935
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
936
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
937
}
938

Jeffrey Morgan's avatar
Jeffrey Morgan committed
939
940
941
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
942
943
944
945
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
946
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
947
948
949
950
951
952
953
954
955
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
956
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
957
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
958
959
960
961
962
				c.Next()
				return
			}
		}

963
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
964
			if c.Request.Method == http.MethodOptions {
965
966
967
968
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

969
970
971
972
973
974
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
975
}
976

977
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
978
979
	config := cors.DefaultConfig()
	config.AllowWildcard = true
980
	config.AllowBrowserExtensions = true
981
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
982
983
984
985
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
986
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
987

Bruce MacDonald's avatar
Bruce MacDonald committed
988
	r := gin.Default()
989
990
	r.Use(
		cors.New(config),
991
		allowedHostsMiddleware(s.addr),
992
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
993

Daniel Hiltgen's avatar
Daniel Hiltgen committed
994
995
996
997
998
999
1000
1001
1002
1003
1004
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
1005
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1006

1007
	// Compatibility endpoints
1008
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
1009
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
1010
1011
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
1012

Michael Yang's avatar
Michael Yang committed
1013
1014
1015
1016
1017
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1018
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
1019
1020
1021
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
1022
1023
	}

1024
1025
1026
1027
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
1028
	level := slog.LevelInfo
1029
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
1030
		level = slog.LevelDebug
1031
	}
Michael Yang's avatar
Michael Yang committed
1032

1033
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

1049
1050
1051
1052
1053
1054
1055
1056
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

1057
	if !envconfig.NoPrune {
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1073
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1074
1075
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1076
	s := &Server{addr: ln.Addr(), sched: sched}
1077
1078

	http.Handle("/", s.GenerateRoutes())
1079

1080
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1081
	srvr := &http.Server{
1082
1083
1084
1085
1086
1087
1088
1089
1090
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1091
1092
	}

1093
1094
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1095
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1096
1097
	go func() {
		<-signals
1098
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1099
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1100
		sched.unloadAllRunners()
1101
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1102
		done()
1103
1104
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1105
	if err := llm.Init(); err != nil {
1106
1107
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1108

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1109
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1110
1111
1112

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1113
1114
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1115

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1116
1117
1118
1119
1120
1121
1122
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1123
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1124
}
Michael Yang's avatar
Michael Yang committed
1125

1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1151
func streamResponse(c *gin.Context, ch chan any) {
1152
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1153
1154
1155
1156
1157
1158
1159
1160
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1161
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1162
1163
1164
			return false
		}

1165
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1166
1167
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1168
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1169
1170
1171
1172
1173
1174
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1175

1176
func (s *Server) ProcessHandler(c *gin.Context) {
1177
	models := []api.ProcessModelResponse{}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1189
		mr := api.ProcessModelResponse{
1190
1191
1192
1193
1194
1195
1196
1197
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1198
1199
1200
1201
1202
1203
1204
1205
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1206
1207
1208
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1209
1210
1211
1212
1213
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1214
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1215
1216
}

1217
// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
Michael Yang's avatar
Michael Yang committed
1218
func chatPrompt(ctx context.Context, runner *runnerRef, template *template.Template, messages []api.Message, numCtx int) (string, error) {
1219
	encode := func(s string) ([]int, error) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1220
		return runner.llama.Tokenize(ctx, s)
1221
1222
	}

1223
	prompt, err := ChatPrompt(template, messages, numCtx, encode)
1224
1225
1226
1227
1228
1229
1230
	if err != nil {
		return "", err
	}

	return prompt, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1231
func (s *Server) ChatHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
	checkpointStart := time.Now()

	var req api.ChatRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	// validate the request
	switch {
	case req.Model == "":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
	}

1255
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
1256
1257
	if err != nil {
		var pErr *fs.PathError
1258
		if errors.As(err, &pErr) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1259
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
1260
1261
1262
1263
1264
1265
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1266
1267
	if !model.Has(CapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%s does not support chat", req.Model)})
1268
1269
1270
		return
	}

1271
1272
1273
1274
1275
	opts, err := modelOptions(model, req.Options)
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
1276

1277
	rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, req.KeepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1278
1279
1280
1281
	var runner *runnerRef
	select {
	case runner = <-rCh:
	case err = <-eCh:
1282
		handleErrorResponse(c, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1283
1284
1285
1286
1287
		return
	}

	checkpointLoaded := time.Now()

1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
	// if the first message is not a system message, then add the model's default system message
	if len(req.Messages) > 0 && req.Messages[0].Role != "system" {
		req.Messages = append([]api.Message{
			{
				Role:    "system",
				Content: model.System,
			},
		}, req.Messages...)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1298
	prompt, err := chatPrompt(c.Request.Context(), runner, model.Template, req.Messages, opts.NumCtx)
Bruce MacDonald's avatar
Bruce MacDonald committed
1299
1300
1301
1302
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
1303

1304
	// an empty request loads the model
1305
	if len(req.Messages) == 0 || prompt == "" {
1306
		resp := api.ChatResponse{
1307
1308
1309
1310
1311
			CreatedAt:  time.Now().UTC(),
			Model:      req.Model,
			Done:       true,
			DoneReason: "load",
			Message:    api.Message{Role: "assistant"},
1312
1313
1314
1315
1316
		}
		c.JSON(http.StatusOK, resp)
		return
	}

1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
	// only send images that are in the prompt
	var i int
	var images []llm.ImageData
	for _, m := range req.Messages {
		for _, img := range m.Images {
			if !isSupportedImageType(img) {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
				return
			}

			if strings.Contains(prompt, fmt.Sprintf("[img-%d]", i)) {
				images = append(images, llm.ImageData{Data: img, ID: i})
			}
			i += 1
		}
	}

	slog.Debug("chat handler", "prompt", prompt, "images", len(images))
1335

Bruce MacDonald's avatar
Bruce MacDonald committed
1336
1337
1338
1339
1340
	ch := make(chan any)

	go func() {
		defer close(ch)

1341
		fn := func(r llm.CompletionResponse) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1342
			resp := api.ChatResponse{
1343
1344
1345
1346
1347
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1348
1349
1350
1351
1352
1353
1354
1355
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}

1356
1357
1358
			if r.Done {
				resp.TotalDuration = time.Since(checkpointStart)
				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Bruce MacDonald's avatar
Bruce MacDonald committed
1359
1360
1361
1362
1363
			}

			ch <- resp
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1364
		if err := runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{
1365
1366
			Prompt:  prompt,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1367
			Images:  images,
1368
			Options: opts,
1369
		}, fn); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1370
1371
1372
1373
1374
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
1375
1376
		// Accumulate responses into the final response
		var final api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1377
1378
		var sb strings.Builder
		for resp := range ch {
1379
1380
			switch r := resp.(type) {
			case api.ChatResponse:
1381
				sb.WriteString(r.Message.Content)
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
				final = r
			case gin.H:
				if errorMsg, ok := r["error"].(string); ok {
					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
					return
				} else {
					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
					return
				}
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1394
1395
			}
		}
1396

1397
		final.Message = api.Message{Role: "assistant", Content: sb.String()}
1398
		c.JSON(http.StatusOK, final)
Bruce MacDonald's avatar
Bruce MacDonald committed
1399
1400
1401
1402
1403
		return
	}

	streamResponse(c, ch)
}
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415

func handleErrorResponse(c *gin.Context, err error) {
	if errors.Is(err, context.Canceled) {
		c.JSON(499, gin.H{"error": "request canceled"})
		return
	}
	if errors.Is(err, ErrMaxQueue) {
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
		return
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
}