routes.go 34 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
	"io"
9
	"io/fs"
10
	"log/slog"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
12
	"net"
	"net/http"
13
	"os"
14
	"os/signal"
Michael Yang's avatar
Michael Yang committed
15
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
16
	"reflect"
17
	"runtime"
Michael Yang's avatar
Michael Yang committed
18
	"strings"
Michael Yang's avatar
Michael Yang committed
19
	"sync"
20
	"syscall"
21
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Michael Yang's avatar
Michael Yang committed
23
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24
	"github.com/gin-gonic/gin"
25
	"golang.org/x/exp/slices"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26

Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
	"github.com/jmorganca/ollama/api"
28
	"github.com/jmorganca/ollama/gpu"
29
	"github.com/jmorganca/ollama/llm"
30
	"github.com/jmorganca/ollama/openai"
Michael Yang's avatar
Michael Yang committed
31
	"github.com/jmorganca/ollama/parser"
Michael Yang's avatar
Michael Yang committed
32
	"github.com/jmorganca/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
33
34
)

Michael Yang's avatar
Michael Yang committed
35
36
var mode string = gin.DebugMode

37
38
39
40
type Server struct {
	WorkDir string
}

Michael Yang's avatar
Michael Yang committed
41
42
43
44
45
46
47
48
49
50
51
52
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
53
var loaded struct {
Michael Yang's avatar
Michael Yang committed
54
55
	mu sync.Mutex

56
	runner llm.LLM
Michael Yang's avatar
Michael Yang committed
57
58
59

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
60

61
62
	*Model
	*api.Options
Michael Yang's avatar
Michael Yang committed
63
64
}

65
66
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
67
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
68
func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
Bruce MacDonald's avatar
Bruce MacDonald committed
69
70
	workDir := c.GetString("workDir")

71
72
73
74
75
76
77
	needLoad := loaded.runner == nil || // is there a model loaded?
		loaded.ModelPath != model.ModelPath || // has the base model changed?
		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
		!reflect.DeepEqual(loaded.Options.Runner, opts.Runner) // have the runner options changed?

	if needLoad {
		if loaded.runner != nil {
78
			slog.Info("changing loaded model")
79
80
81
82
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
83
		}
Michael Yang's avatar
Michael Yang committed
84

Michael Yang's avatar
Michael Yang committed
85
		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
Michael Yang's avatar
Michael Yang committed
86
		if err != nil {
87
88
89
			// some older models are not compatible with newer versions of llama.cpp
			// show a generalized compatibility error until there is a better way to
			// check for model compatibility
Bruce MacDonald's avatar
Bruce MacDonald committed
90
			if errors.Is(llm.ErrUnsupportedFormat, err) || strings.Contains(err.Error(), "failed to load model") {
91
92
93
				err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
			}

94
			return err
Michael Yang's avatar
Michael Yang committed
95
96
		}

97
98
99
		loaded.Model = model
		loaded.runner = llmRunner
		loaded.Options = &opts
Michael Yang's avatar
Michael Yang committed
100
	}
101

Jeffrey Morgan's avatar
Jeffrey Morgan committed
102
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
103

Jeffrey Morgan's avatar
Jeffrey Morgan committed
104
105
106
107
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
108

Jeffrey Morgan's avatar
Jeffrey Morgan committed
109
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
110
111
112
				return
			}

113
114
			if loaded.runner != nil {
				loaded.runner.Close()
Michael Yang's avatar
Michael Yang committed
115
116
			}

117
118
119
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
120
		})
Michael Yang's avatar
Michael Yang committed
121
	}
122

Jeffrey Morgan's avatar
Jeffrey Morgan committed
123
	loaded.expireTimer.Reset(sessionDuration)
124
125
126
127
128
129
130
131
132
133
134
135
136
137
	return nil
}

func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
}

140
141
142
143
144
145
func isSupportedImageType(image []byte) bool {
	contentType := http.DetectContentType(image)
	allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
	return slices.Contains(allowedTypes, contentType)
}

Bruce MacDonald's avatar
Bruce MacDonald committed
146
147
148
149
150
151
func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()
	var req api.GenerateRequest
Michael Yang's avatar
Michael Yang committed
152
	err := c.ShouldBindJSON(&req)
Patrick Devine's avatar
Patrick Devine committed
153

Michael Yang's avatar
Michael Yang committed
154
155
156
157
158
159
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
160
161
162
		return
	}

163
164
165
	// validate the request
	switch {
	case req.Model == "":
166
167
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
168
169
170
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
171
172
173
	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
		return
174
175
	}

176
177
178
179
180
181
182
	for _, img := range req.Images {
		if !isSupportedImageType(img) {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
			return
		}
	}

183
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
184
	if err != nil {
185
		var pErr *fs.PathError
186
		if errors.As(err, &pErr) {
187
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
188
189
190
191
192
193
194
195
196
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	opts, err := modelOptions(model, req.Options)
	if err != nil {
		if errors.Is(err, api.ErrInvalidOpts) {
Bruce MacDonald's avatar
Bruce MacDonald committed
197
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
198
			return
199
		}
200
201
202
203
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

204
205
206
207
208
209
210
	var sessionDuration time.Duration
	if req.KeepAlive == nil {
		sessionDuration = defaultSessionDuration
	} else {
		sessionDuration = req.KeepAlive.Duration
	}

211
212
	if err := load(c, model, opts, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
213
214
215
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
216
217
	// an empty request loads the model
	if req.Prompt == "" && req.Template == "" && req.System == "" {
218
		c.JSON(http.StatusOK, api.GenerateResponse{
219
220
			CreatedAt: time.Now().UTC(),
			Model:     req.Model,
Michael Yang's avatar
Michael Yang committed
221
222
			Done:      true,
		})
Bruce MacDonald's avatar
Bruce MacDonald committed
223
224
225
226
227
		return
	}

	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
228
	var prompt string
229
	var promptVars PromptVars
Bruce MacDonald's avatar
Bruce MacDonald committed
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
	switch {
	case req.Raw:
		prompt = req.Prompt
	case req.Prompt != "":
		if req.Template != "" {
			// override the default model template
			model.Template = req.Template
		}

		var rebuild strings.Builder
		if req.Context != nil {
			// TODO: context is deprecated, at some point the context logic within this conditional should be removed
			prevCtx, err := loaded.runner.Decode(c.Request.Context(), req.Context)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			// Remove leading spaces from prevCtx if present
			prevCtx = strings.TrimPrefix(prevCtx, " ")
			rebuild.WriteString(prevCtx)
		}
252
		promptVars = PromptVars{
Bruce MacDonald's avatar
Bruce MacDonald committed
253
254
255
			System: req.System,
			Prompt: req.Prompt,
			First:  len(req.Context) == 0,
256
		}
257
258
259
260
261

		if promptVars.System == "" {
			promptVars.System = model.System
		}

262
263
264
265
		for i := range req.Images {
			promptVars.Prompt += fmt.Sprintf(" [img-%d]", i)
		}

266
		p, err := model.PreResponsePrompt(promptVars)
267
268
269
270
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
271
272
		rebuild.WriteString(p)
		prompt = rebuild.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
273
274
	}

Michael Yang's avatar
Michael Yang committed
275
	slog.Debug("generate handler", "prompt", prompt)
276

Bruce MacDonald's avatar
Bruce MacDonald committed
277
	ch := make(chan any)
Bruce MacDonald's avatar
Bruce MacDonald committed
278
	var generated strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
279
280
281
	go func() {
		defer close(ch)

Bruce MacDonald's avatar
Bruce MacDonald committed
282
283
		fn := func(r llm.PredictResult) {
			// Update model expiration
Bruce MacDonald's avatar
Bruce MacDonald committed
284
285
286
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)

Bruce MacDonald's avatar
Bruce MacDonald committed
287
288
289
290
			// Build up the full response
			if _, err := generated.WriteString(r.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
291
292
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
293
			resp := api.GenerateResponse{
294
				Model:     req.Model,
295
				CreatedAt: time.Now().UTC(),
296
297
				Done:      r.Done,
				Response:  r.Content,
Bruce MacDonald's avatar
Bruce MacDonald committed
298
299
300
301
302
303
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
304
305
			}

306
307
308
309
310
			if r.Done {
				resp.TotalDuration = time.Since(checkpointStart)
				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)

				if !req.Raw {
311
312
313
314
315
316
317
318
					// append the generated text to the history and template it if needed
					promptVars.Response = generated.String()
					result, err := model.PostResponseTemplate(promptVars)
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
					embd, err := loaded.runner.Encode(c.Request.Context(), prompt+result)
319
320
321
322
323
					if err != nil {
						ch <- gin.H{"error": err.Error()}
						return
					}
					resp.Context = embd
Bruce MacDonald's avatar
Bruce MacDonald committed
324
325
326
327
				}
			}

			ch <- resp
Bruce MacDonald's avatar
Bruce MacDonald committed
328
329
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
330
		var images []llm.ImageData
Michael Yang's avatar
Michael Yang committed
331
		for i := range req.Images {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
332
333
334
335
			images = append(images, llm.ImageData{
				ID:   i,
				Data: req.Images[i],
			})
Michael Yang's avatar
Michael Yang committed
336
337
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
338
339
		// Start prediction
		predictReq := llm.PredictOpts{
340
341
			Prompt:  prompt,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
342
			Images:  images,
343
			Options: opts,
Bruce MacDonald's avatar
Bruce MacDonald committed
344
345
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
346
347
348
349
350
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
351
352
		// Accumulate responses into the final response
		var final api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
353
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
354
		for resp := range ch {
355
356
357
358
359
360
361
362
363
364
365
366
367
368
			switch r := resp.(type) {
			case api.GenerateResponse:
				sb.WriteString(r.Response)
				final = r
			case gin.H:
				if errorMsg, ok := r["error"].(string); ok {
					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
					return
				} else {
					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
					return
				}
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
Bruce MacDonald's avatar
Bruce MacDonald committed
369
370
371
				return
			}
		}
372
373
374

		final.Response = sb.String()
		c.JSON(http.StatusOK, final)
Bruce MacDonald's avatar
Bruce MacDonald committed
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
		return
	}

	streamResponse(c, ch)
}

func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if req.Model == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

401
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
402
	if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
403
		var pErr *fs.PathError
404
		if errors.As(err, &pErr) {
Bruce MacDonald's avatar
Bruce MacDonald committed
405
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
406
407
408
409
410
411
412
413
414
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	opts, err := modelOptions(model, req.Options)
	if err != nil {
		if errors.Is(err, api.ErrInvalidOpts) {
Bruce MacDonald's avatar
Bruce MacDonald committed
415
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
416
			return
Bruce MacDonald's avatar
Bruce MacDonald committed
417
		}
418
419
420
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
421
422
423
424
425
426
427
428

	var sessionDuration time.Duration
	if req.KeepAlive == nil {
		sessionDuration = defaultSessionDuration
	} else {
		sessionDuration = req.KeepAlive.Duration
	}

429
430
	if err := load(c, model, opts, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
431
432
433
		return
	}

434
	if !loaded.Options.EmbeddingOnly {
Bruce MacDonald's avatar
Bruce MacDonald committed
435
436
437
438
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

439
	embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
440
	if err != nil {
441
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
442
443
444
445
446
447
448
449
450
451
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

452
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
453
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
454
455
456
457
458
459
460
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
461
462
463
		return
	}

Michael Yang's avatar
Michael Yang committed
464
465
466
467
468
469
470
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
471
472
473
		return
	}

474
475
476
	ch := make(chan any)
	go func() {
		defer close(ch)
477
478
		fn := func(r api.ProgressResponse) {
			ch <- r
479
		}
480

481
482
483
484
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

485
486
487
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
488
		if err := PullModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
489
			ch <- gin.H{"error": err.Error()}
490
491
492
		}
	}()

493
494
495
496
497
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

498
499
500
	streamResponse(c, ch)
}

501
func PushModelHandler(c *gin.Context) {
502
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
503
504
505
506
507
508
509
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
510
511
		return
	}
Michael Yang's avatar
Michael Yang committed
512

Michael Yang's avatar
Michael Yang committed
513
514
515
516
517
518
519
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
520
521
522
		return
	}

523
524
525
	ch := make(chan any)
	go func() {
		defer close(ch)
526
527
		fn := func(r api.ProgressResponse) {
			ch <- r
528
		}
529

530
531
532
533
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
534
535
536
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
537
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
538
			ch <- gin.H{"error": err.Error()}
539
540
541
		}
	}()

542
543
544
545
546
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

547
548
549
	streamResponse(c, ch)
}

550
func CreateModelHandler(c *gin.Context) {
551
	var req api.CreateRequest
Michael Yang's avatar
Michael Yang committed
552
553
554
555
556
557
558
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
559
		return
560
561
	}

Michael Yang's avatar
Michael Yang committed
562
563
564
565
566
567
568
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
569
570
571
		return
	}

Michael Yang's avatar
Michael Yang committed
572
	if err := ParseModelPath(model).Validate(); err != nil {
573
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
574
575
576
		return
	}

Michael Yang's avatar
Michael Yang committed
577
578
	if req.Path == "" && req.Modelfile == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
579
580
		return
	}
Michael Yang's avatar
Michael Yang committed
581
582
583

	var modelfile io.Reader = strings.NewReader(req.Modelfile)
	if req.Path != "" && req.Modelfile == "" {
584
		mf, err := os.Open(req.Path)
Michael Yang's avatar
Michael Yang committed
585
586
587
588
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
589
		defer mf.Close()
Michael Yang's avatar
Michael Yang committed
590

591
		modelfile = mf
Michael Yang's avatar
Michael Yang committed
592
	}
Michael Yang's avatar
Michael Yang committed
593
594
595
596
597
598
599

	commands, err := parser.Parse(modelfile)
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
600
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
601
602
	go func() {
		defer close(ch)
603
604
		fn := func(resp api.ProgressResponse) {
			ch <- resp
605
606
		}

607
608
609
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
610
		if err := CreateModel(ctx, model, filepath.Dir(req.Path), commands, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
611
			ch <- gin.H{"error": err.Error()}
612
		}
Michael Yang's avatar
Michael Yang committed
613
	}()
Michael Yang's avatar
Michael Yang committed
614

615
616
617
618
619
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
620
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
621
622
}

623
624
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
Michael Yang's avatar
Michael Yang committed
625
626
627
628
629
630
631
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
632
633
634
		return
	}

Michael Yang's avatar
Michael Yang committed
635
636
637
638
639
640
641
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
642
643
644
		return
	}

Michael Yang's avatar
Michael Yang committed
645
	if err := DeleteModel(model); err != nil {
646
		if os.IsNotExist(err) {
Michael Yang's avatar
Michael Yang committed
647
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", model)})
648
		} else {
649
650
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
651
652
		return
	}
Michael Yang's avatar
Michael Yang committed
653
654
655
656
657
658
659
660
661
662
663
664

	manifestsPath, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := PruneDirectory(manifestsPath); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

665
	c.JSON(http.StatusOK, nil)
666
667
}

Patrick Devine's avatar
Patrick Devine committed
668
669
func ShowModelHandler(c *gin.Context) {
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
670
671
672
673
674
675
676
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
677
678
679
		return
	}

Michael Yang's avatar
Michael Yang committed
680
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
681
		// noop
Michael Yang's avatar
Michael Yang committed
682
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
683
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
684
	} else {
685
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
686
687
688
		return
	}

689
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
690
691
	if err != nil {
		if os.IsNotExist(err) {
Michael Yang's avatar
Michael Yang committed
692
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
Patrick Devine's avatar
Patrick Devine committed
693
694
695
696
697
698
699
700
701
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

702
703
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
	model, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
704
705
706
707
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
708
	modelDetails := api.ModelDetails{
709
		ParentModel:       model.ParentModel,
Patrick Devine's avatar
Patrick Devine committed
710
711
712
713
714
715
716
		Format:            model.Config.ModelFormat,
		Family:            model.Config.ModelFamily,
		Families:          model.Config.ModelFamilies,
		ParameterSize:     model.Config.ModelType,
		QuantizationLevel: model.Config.FileType,
	}

717
718
719
720
721
722
723
724
	if req.System != "" {
		model.System = req.System
	}

	if req.Template != "" {
		model.Template = req.Template
	}

725
726
727
728
729
	msgs := make([]api.Message, 0)
	for _, msg := range model.Messages {
		msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
	}

Patrick Devine's avatar
Patrick Devine committed
730
731
732
733
	resp := &api.ShowResponse{
		License:  strings.Join(model.License, "\n"),
		System:   model.System,
		Template: model.Template,
Patrick Devine's avatar
Patrick Devine committed
734
		Details:  modelDetails,
735
		Messages: msgs,
Patrick Devine's avatar
Patrick Devine committed
736
737
738
739
740
741
742
743
	}

	var params []string
	cs := 30
	for k, v := range model.Options {
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
744
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
745
			}
Patrick Devine's avatar
Patrick Devine committed
746
747
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
748
749
750
751
		}
	}
	resp.Parameters = strings.Join(params, "\n")

752
753
754
755
756
757
758
759
760
761
762
763
764
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
			model.Options[k] = v
		}
	}

	mf, err := ShowModelfile(model)
	if err != nil {
		return nil, err
	}

	resp.Modelfile = mf

Patrick Devine's avatar
Patrick Devine committed
765
766
767
	return resp, nil
}

768
func ListModelsHandler(c *gin.Context) {
769
	models := make([]api.ModelResponse, 0)
770
	manifestsPath, err := GetManifestPath()
Patrick Devine's avatar
Patrick Devine committed
771
772
773
774
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
775

Patrick Devine's avatar
Patrick Devine committed
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
	modelResponse := func(modelName string) (api.ModelResponse, error) {
		model, err := GetModel(modelName)
		if err != nil {
			return api.ModelResponse{}, err
		}

		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

		return api.ModelResponse{
Michael Yang's avatar
Michael Yang committed
791
			Model:   model.ShortName,
Patrick Devine's avatar
Patrick Devine committed
792
793
794
795
796
797
798
			Name:    model.ShortName,
			Size:    model.Size,
			Digest:  model.Digest,
			Details: modelDetails,
		}, nil
	}

Michael Yang's avatar
Michael Yang committed
799
	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
800
		if !info.IsDir() {
801
802
803
804
			path, tag := filepath.Split(path)
			model := strings.Trim(strings.TrimPrefix(path, manifestsPath), string(os.PathSeparator))
			modelPath := strings.Join([]string{model, tag}, ":")
			canonicalModelPath := strings.ReplaceAll(modelPath, string(os.PathSeparator), "/")
805

806
			resp, err := modelResponse(canonicalModelPath)
Patrick Devine's avatar
Patrick Devine committed
807
			if err != nil {
808
				slog.Info(fmt.Sprintf("skipping file: %s", canonicalModelPath))
Michael Yang's avatar
Michael Yang committed
809
				// nolint: nilerr
810
				return nil
Patrick Devine's avatar
Patrick Devine committed
811
			}
Michael Yang's avatar
Michael Yang committed
812

Patrick Devine's avatar
Patrick Devine committed
813
814
			resp.ModifiedAt = info.ModTime()
			models = append(models, resp)
Patrick Devine's avatar
Patrick Devine committed
815
		}
Michael Yang's avatar
Michael Yang committed
816

Patrick Devine's avatar
Patrick Devine committed
817
		return nil
Michael Yang's avatar
Michael Yang committed
818
819
	}

820
	if err := filepath.Walk(manifestsPath, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
821
822
823
824
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
825
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
826
827
}

Patrick Devine's avatar
Patrick Devine committed
828
829
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
Michael Yang's avatar
Michael Yang committed
830
831
832
833
834
835
836
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
837
838
839
		return
	}

840
841
842
843
844
	if req.Source == "" || req.Destination == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
		return
	}

845
846
847
848
849
	if err := ParseModelPath(req.Destination).Validate(); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
850
851
852
853
854
855
856
857
858
859
	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Michael Yang's avatar
Michael Yang committed
860
func HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
861
862
863
864
865
866
867
868
869
870
871
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
872
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
873
874
875
}

func CreateBlobHandler(c *gin.Context) {
876
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
877
878
879
880
881
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

882
883
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
884
885
886
		return
	}

887
	if _, err := layer.Commit(); err != nil {
Michael Yang's avatar
Michael Yang committed
888
889
890
891
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
892
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
893
894
}

Michael Yang's avatar
Michael Yang committed
895
896
897
898
899
900
var defaultAllowOrigins = []string{
	"localhost",
	"127.0.0.1",
	"0.0.0.0",
}

901
902
903
904
905
func NewServer() (*Server, error) {
	workDir, err := os.MkdirTemp("", "ollama")
	if err != nil {
		return nil, err
	}
906

907
908
909
910
	return &Server{
		WorkDir: workDir,
	}, nil
}
911

912
913
914
915
func (s *Server) GenerateRoutes() http.Handler {
	var origins []string
	if o := os.Getenv("OLLAMA_ORIGINS"); o != "" {
		origins = strings.Split(o, ",")
916
917
	}

Michael Yang's avatar
Michael Yang committed
918
919
	config := cors.DefaultConfig()
	config.AllowWildcard = true
920
	config.AllowBrowserExtensions = true
Michael Yang's avatar
Michael Yang committed
921

922
	config.AllowOrigins = origins
Michael Yang's avatar
Michael Yang committed
923
924
925
926
927
928
929
930
	for _, allowOrigin := range defaultAllowOrigins {
		config.AllowOrigins = append(config.AllowOrigins,
			fmt.Sprintf("http://%s", allowOrigin),
			fmt.Sprintf("https://%s", allowOrigin),
			fmt.Sprintf("http://%s:*", allowOrigin),
			fmt.Sprintf("https://%s:*", allowOrigin),
		)
	}
Michael Yang's avatar
Michael Yang committed
931

Bruce MacDonald's avatar
Bruce MacDonald committed
932
	r := gin.Default()
933
934
935
	r.Use(
		cors.New(config),
		func(c *gin.Context) {
936
			c.Set("workDir", s.WorkDir)
937
938
939
			c.Next()
		},
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
940

941
942
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
943
	r.POST("/api/chat", ChatHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
944
	r.POST("/api/embeddings", EmbeddingHandler)
945
946
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
947
	r.POST("/api/copy", CopyModelHandler)
948
	r.DELETE("/api/delete", DeleteModelHandler)
Patrick Devine's avatar
Patrick Devine committed
949
	r.POST("/api/show", ShowModelHandler)
Michael Yang's avatar
Michael Yang committed
950
	r.POST("/api/blobs/:digest", CreateBlobHandler)
Michael Yang's avatar
Michael Yang committed
951
	r.HEAD("/api/blobs/:digest", HeadBlobHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
952

953
954
955
	// Compatibility endpoints
	r.POST("/v1/chat/completions", openai.Middleware(), ChatHandler)

Michael Yang's avatar
Michael Yang committed
956
957
958
959
960
961
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

		r.Handle(method, "/api/tags", ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
962
963
964
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
965
966
	}

967
968
969
970
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
971
	level := slog.LevelInfo
972
	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
Michael Yang's avatar
Michael Yang committed
973
		level = slog.LevelDebug
974
	}
Michael Yang's avatar
Michael Yang committed
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990

	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

	s, err := NewServer()
	if err != nil {
		return err
	}
	r := s.GenerateRoutes()

1013
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
1014
	srvr := &http.Server{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1015
1016
1017
		Handler: r,
	}

1018
1019
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1020
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1021
1022
	go func() {
		<-signals
1023
1024
		if loaded.runner != nil {
			loaded.runner.Close()
1025
		}
1026
		os.RemoveAll(s.WorkDir)
1027
1028
1029
		os.Exit(0)
	}()

1030
1031
1032
1033
	if err := llm.Init(s.WorkDir); err != nil {
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
	if runtime.GOOS == "linux" { // TODO - windows too
1034
		// check compatibility to log warnings
1035
		if _, err := gpu.CheckVRAM(); err != nil {
1036
			slog.Info(err.Error())
1037
1038
1039
		}
	}

1040
	return srvr.Serve(ln)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1041
}
Michael Yang's avatar
Michael Yang committed
1042

1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1068
func streamResponse(c *gin.Context, ch chan any) {
1069
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1070
1071
1072
1073
1074
1075
1076
1077
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1078
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1079
1080
1081
			return false
		}

1082
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1083
1084
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1085
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1086
1087
1088
1089
1090
1091
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119

func ChatHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.ChatRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	// validate the request
	switch {
	case req.Model == "":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
	}

1120
1121
1122
1123
1124
1125
1126
1127
1128
	for _, msg := range req.Messages {
		for _, img := range msg.Images {
			if !isSupportedImageType(img) {
				c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
				return
			}
		}
	}

1129
	model, err := GetModel(req.Model)
Bruce MacDonald's avatar
Bruce MacDonald committed
1130
1131
	if err != nil {
		var pErr *fs.PathError
1132
		if errors.As(err, &pErr) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1133
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
1134
1135
1136
1137
1138
1139
1140
1141
1142
			return
		}
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	opts, err := modelOptions(model, req.Options)
	if err != nil {
		if errors.Is(err, api.ErrInvalidOpts) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1143
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
1144
			return
Bruce MacDonald's avatar
Bruce MacDonald committed
1145
		}
1146
1147
1148
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
1149
1150
1151
1152
1153
1154
1155
1156

	var sessionDuration time.Duration
	if req.KeepAlive == nil {
		sessionDuration = defaultSessionDuration
	} else {
		sessionDuration = req.KeepAlive.Duration
	}

1157
1158
	if err := load(c, model, opts, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
1159
1160
1161
1162
1163
		return
	}

	checkpointLoaded := time.Now()

1164
	chat, err := model.ChatPrompts(req.Messages)
Bruce MacDonald's avatar
Bruce MacDonald committed
1165
1166
1167
1168
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
1169
1170

	prompt, images, err := trimmedPrompt(c.Request.Context(), chat, model)
1171
1172
1173
1174
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
1175

1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
	// an empty request loads the model
	if len(prompt) == 0 {
		resp := api.ChatResponse{
			CreatedAt: time.Now().UTC(),
			Model:     req.Model,
			Done:      true,
			Message:   api.Message{Role: "assistant"},
		}
		c.JSON(http.StatusOK, resp)
		return
	}

Michael Yang's avatar
Michael Yang committed
1188
	slog.Debug("chat handler", "prompt", prompt)
1189

Bruce MacDonald's avatar
Bruce MacDonald committed
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
	ch := make(chan any)

	go func() {
		defer close(ch)

		fn := func(r llm.PredictResult) {
			// Update model expiration
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)

			resp := api.ChatResponse{
1201
				Model:     req.Model,
1202
				CreatedAt: time.Now().UTC(),
1203
				Message:   api.Message{Role: "assistant", Content: r.Content},
Bruce MacDonald's avatar
Bruce MacDonald committed
1204
1205
1206
1207
1208
1209
1210
1211
1212
				Done:      r.Done,
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}

1213
1214
1215
			if r.Done {
				resp.TotalDuration = time.Since(checkpointStart)
				resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Bruce MacDonald's avatar
Bruce MacDonald committed
1216
1217
1218
1219
1220
1221
1222
			}

			ch <- resp
		}

		// Start prediction
		predictReq := llm.PredictOpts{
1223
1224
			Prompt:  prompt,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1225
			Images:  images,
1226
			Options: opts,
Bruce MacDonald's avatar
Bruce MacDonald committed
1227
1228
1229
1230
1231
1232
1233
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
1234
1235
		// Accumulate responses into the final response
		var final api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1236
1237
		var sb strings.Builder
		for resp := range ch {
1238
1239
			switch r := resp.(type) {
			case api.ChatResponse:
1240
				sb.WriteString(r.Message.Content)
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
				final = r
			case gin.H:
				if errorMsg, ok := r["error"].(string); ok {
					c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
					return
				} else {
					c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
					return
				}
			default:
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1253
1254
			}
		}
1255

1256
		final.Message = api.Message{Role: "assistant", Content: sb.String()}
1257
		c.JSON(http.StatusOK, final)
Bruce MacDonald's avatar
Bruce MacDonald committed
1258
1259
1260
1261
1262
		return
	}

	streamResponse(c, ch)
}
1263
1264
1265
1266
1267
1268
1269
1270
1271

// promptInfo stores the variables used to template a prompt, and the token length of the resulting template for some model
type promptInfo struct {
	vars     PromptVars
	tokenLen int
}

// trimmedPrompt builds a prompt to send to a running model. It ensures the prompt fits within the max context length,
// while preserving the most recent system message.
Michael Yang's avatar
Michael Yang committed
1272
func trimmedPrompt(ctx context.Context, chat *ChatHistory, model *Model) (string, []llm.ImageData, error) {
1273
	if len(chat.Prompts) == 0 {
Michael Yang's avatar
Michael Yang committed
1274
		return "", nil, nil
1275
1276
1277
1278
1279
1280
	}

	var promptsToAdd []promptInfo
	var totalTokenLength int
	var systemPromptIncluded bool

Michael Yang's avatar
Michael Yang committed
1281
	var images []llm.ImageData
1282
1283
	// reverse iterate through the prompts to build the prompt string in a way that fits the max context length
	for i := len(chat.Prompts) - 1; i >= 0; i-- {
Michael Yang's avatar
Michael Yang committed
1284
1285
		prompt := chat.Prompts[i]
		promptText, err := promptString(model, prompt, i == len(chat.Prompts)-1)
1286
		if err != nil {
Michael Yang's avatar
Michael Yang committed
1287
			return "", nil, err
1288
1289
1290
1291
		}

		encodedTokens, err := loaded.runner.Encode(ctx, promptText)
		if err != nil {
Michael Yang's avatar
Michael Yang committed
1292
			return "", nil, err
1293
1294
1295
1296
1297
1298
		}

		if totalTokenLength+len(encodedTokens) > loaded.NumCtx && i != len(chat.Prompts)-1 {
			break // reached max context length, stop adding more prompts
		}

Michael Yang's avatar
Michael Yang committed
1299
1300
1301
1302
1303
1304
		for j := range prompt.Images {
			if totalTokenLength+768 > loaded.NumCtx {
				// this decreases the token length but overestimating is fine
				prompt.Prompt = strings.ReplaceAll(prompt.Prompt, fmt.Sprintf(" [img-%d]", prompt.Images[j].ID), "")
				continue
			}
Michael Yang's avatar
Michael Yang committed
1305

Michael Yang's avatar
Michael Yang committed
1306
1307
1308
			totalTokenLength += 768
			images = append(images, prompt.Images[j])
		}
1309

1310
		totalTokenLength += len(encodedTokens)
Michael Yang's avatar
Michael Yang committed
1311
1312
		systemPromptIncluded = systemPromptIncluded || prompt.System != ""
		promptsToAdd = append(promptsToAdd, promptInfo{vars: prompt, tokenLen: len(encodedTokens)})
1313
1314
1315
1316
1317
1318
1319
	}

	// ensure the system prompt is included, if not already
	if chat.LastSystem != "" && !systemPromptIncluded {
		var err error
		promptsToAdd, err = includeSystemPrompt(ctx, chat.LastSystem, totalTokenLength, promptsToAdd)
		if err != nil {
Michael Yang's avatar
Michael Yang committed
1320
			return "", nil, err
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
		}
	}

	promptsToAdd[len(promptsToAdd)-1].vars.First = true

	// construct the final prompt string from the prompts which fit within the context window
	var result string
	for i, prompt := range promptsToAdd {
		promptText, err := promptString(model, prompt.vars, i == 0)
		if err != nil {
Michael Yang's avatar
Michael Yang committed
1331
			return "", nil, err
1332
1333
1334
		}
		result = promptText + result
	}
Michael Yang's avatar
Michael Yang committed
1335

Michael Yang's avatar
Michael Yang committed
1336
	return result, images, nil
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
}

// promptString applies the model template to the prompt
func promptString(model *Model, vars PromptVars, isMostRecent bool) (string, error) {
	if isMostRecent {
		p, err := model.PreResponsePrompt(vars)
		if err != nil {
			return "", fmt.Errorf("pre-response template: %w", err)
		}
		return p, nil
	}
	p, err := Prompt(model.Template, vars)
	if err != nil {
		return "", err
	}
	return p, nil
}

// includeSystemPrompt adjusts the prompts to include the system prompt.
func includeSystemPrompt(ctx context.Context, systemPrompt string, totalTokenLength int, promptsToAdd []promptInfo) ([]promptInfo, error) {
	systemTokens, err := loaded.runner.Encode(ctx, systemPrompt)
	if err != nil {
		return nil, err
	}

	for i := len(promptsToAdd) - 1; i >= 0; i-- {
		if totalTokenLength+len(systemTokens) <= loaded.NumCtx {
			promptsToAdd[i].vars.System = systemPrompt
			return promptsToAdd[:i+1], nil
		}
		totalTokenLength -= promptsToAdd[i].tokenLen
	}

	// if got here, system did not fit anywhere, so return the most recent prompt with the system message set
	recent := promptsToAdd[len(promptsToAdd)-1]
	recent.vars.System = systemPrompt
	return []promptInfo{recent}, nil
}