routes.go 25.9 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"crypto/sha256"
Michael Yang's avatar
Michael Yang committed
6
	"encoding/json"
7
	"errors"
8
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
9
	"io"
10
	"io/fs"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
12
13
	"log"
	"net"
	"net/http"
14
	"os"
15
	"os/signal"
Michael Yang's avatar
Michael Yang committed
16
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17
	"reflect"
18
	"runtime"
Patrick Devine's avatar
Patrick Devine committed
19
	"strconv"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
Michael Yang's avatar
Michael Yang committed
21
	"sync"
22
	"syscall"
23
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24

Michael Yang's avatar
Michael Yang committed
25
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
	"github.com/gin-gonic/gin"

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
	"github.com/jmorganca/ollama/api"
29
	"github.com/jmorganca/ollama/llm"
Michael Yang's avatar
Michael Yang committed
30
	"github.com/jmorganca/ollama/parser"
Michael Yang's avatar
Michael Yang committed
31
	"github.com/jmorganca/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
32
33
)

Michael Yang's avatar
Michael Yang committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
48
var loaded struct {
Michael Yang's avatar
Michael Yang committed
49
50
	mu sync.Mutex

51
	runner llm.LLM
Michael Yang's avatar
Michael Yang committed
52
53
54

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55

56
57
	*Model
	*api.Options
Michael Yang's avatar
Michael Yang committed
58
59
}

60
61
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
62
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
Bruce MacDonald's avatar
Bruce MacDonald committed
63
64
65
66
67
68
69
70
func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sessionDuration time.Duration) (*Model, error) {
	model, err := GetModel(modelName)
	if err != nil {
		return nil, err
	}

	workDir := c.GetString("workDir")

71
72
73
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
74
		return nil, err
75
76
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
77
	if err := opts.FromMap(reqOpts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
78
		return nil, err
79
80
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
81
82
	ctx := c.Request.Context()

83
	// check if the loaded model is still running in a subprocess, in case something unexpected happened
84
85
	if loaded.runner != nil {
		if err := loaded.runner.Ping(ctx); err != nil {
86
87
			log.Print("loaded llm process not responding, closing now")
			// the subprocess is no longer running, so close it
88
89
90
91
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
92
93
94
		}
	}

95
96
97
98
99
100
101
	needLoad := loaded.runner == nil || // is there a model loaded?
		loaded.ModelPath != model.ModelPath || // has the base model changed?
		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
		!reflect.DeepEqual(loaded.Options.Runner, opts.Runner) // have the runner options changed?

	if needLoad {
		if loaded.runner != nil {
102
			log.Println("changing loaded model")
103
104
105
106
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
107
		}
Michael Yang's avatar
Michael Yang committed
108

109
		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
110
		if err != nil {
111
112
113
114
115
116
117
			// some older models are not compatible with newer versions of llama.cpp
			// show a generalized compatibility error until there is a better way to
			// check for model compatibility
			if strings.Contains(err.Error(), "failed to load model") {
				err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
118
			return nil, err
Michael Yang's avatar
Michael Yang committed
119
120
		}

121
122
123
		loaded.Model = model
		loaded.runner = llmRunner
		loaded.Options = &opts
Michael Yang's avatar
Michael Yang committed
124
	}
125

Michael Yang's avatar
Michael Yang committed
126
127
128
129
	// update options for the loaded llm
	// TODO(mxyng): this isn't thread safe, but it should be fine for now
	loaded.runner.SetOptions(opts)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
130
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
131

Jeffrey Morgan's avatar
Jeffrey Morgan committed
132
133
134
135
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
136

Jeffrey Morgan's avatar
Jeffrey Morgan committed
137
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
138
139
140
				return
			}

141
142
			if loaded.runner != nil {
				loaded.runner.Close()
Michael Yang's avatar
Michael Yang committed
143
144
			}

145
146
147
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
148
		})
Michael Yang's avatar
Michael Yang committed
149
	}
150

Jeffrey Morgan's avatar
Jeffrey Morgan committed
151
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
	return model, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
153
154
155
156
157
158
159
160
161
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
Michael Yang's avatar
Michael Yang committed
162
163
164
165
166
167
168
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
169
170
171
		return
	}

172
173
174
	// validate the request
	switch {
	case req.Model == "":
175
176
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
177
178
179
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
180
181
182
	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
		return
183
184
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
185
186
	sessionDuration := defaultSessionDuration
	model, err := load(c, req.Model, req.Options, sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
187
	if err != nil {
188
		var pErr *fs.PathError
Bruce MacDonald's avatar
Bruce MacDonald committed
189
190
		switch {
		case errors.As(err, &pErr):
191
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
192
193
194
195
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
196
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
197
198
199
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
200
201
202
	// an empty request loads the model
	if req.Prompt == "" && req.Template == "" && req.System == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true})
Bruce MacDonald's avatar
Bruce MacDonald committed
203
204
205
206
207
		return
	}

	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
	var prompt string
	switch {
	case req.Raw:
		prompt = req.Prompt
	case req.Prompt != "":
		if req.Template != "" {
			// override the default model template
			model.Template = req.Template
		}

		var rebuild strings.Builder
		if req.Context != nil {
			// TODO: context is deprecated, at some point the context logic within this conditional should be removed
			prevCtx, err := loaded.runner.Decode(c.Request.Context(), req.Context)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			// Remove leading spaces from prevCtx if present
			prevCtx = strings.TrimPrefix(prevCtx, " ")
			rebuild.WriteString(prevCtx)
		}
		p, err := model.Prompt(PromptVars{
			System: req.System,
			Prompt: req.Prompt,
			First:  len(req.Context) == 0,
		})
236
237
238
239
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
240
241
		rebuild.WriteString(p)
		prompt = rebuild.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
242
243
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
244
	ch := make(chan any)
Bruce MacDonald's avatar
Bruce MacDonald committed
245
	var generated strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
246
247
248
	go func() {
		defer close(ch)

Bruce MacDonald's avatar
Bruce MacDonald committed
249
250
		fn := func(r llm.PredictResult) {
			// Update model expiration
Bruce MacDonald's avatar
Bruce MacDonald committed
251
252
253
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)

Bruce MacDonald's avatar
Bruce MacDonald committed
254
255
256
257
			// Build up the full response
			if _, err := generated.WriteString(r.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
258
259
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
260
261
262
263
264
265
266
267
268
269
270
271
272
			resp := api.GenerateResponse{
				Model:     r.Model,
				CreatedAt: r.CreatedAt,
				Done:      r.Done,
				Response:  r.Content,
				Metrics: api.Metrics{
					TotalDuration:      r.TotalDuration,
					LoadDuration:       r.LoadDuration,
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
273
274
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
275
276
277
278
279
280
281
282
283
284
			if r.Done && !req.Raw {
				embd, err := loaded.runner.Encode(c.Request.Context(), req.Prompt+generated.String())
				if err != nil {
					ch <- gin.H{"error": err.Error()}
					return
				}
				resp.Context = embd
			}

			ch <- resp
Bruce MacDonald's avatar
Bruce MacDonald committed
285
286
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
287
288
289
290
291
292
293
294
295
		// Start prediction
		predictReq := llm.PredictOpts{
			Model:            model.Name,
			Prompt:           prompt,
			Format:           req.Format,
			CheckpointStart:  checkpointStart,
			CheckpointLoaded: checkpointLoaded,
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
296
297
298
299
300
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Bruce MacDonald's avatar
Bruce MacDonald committed
301
302
303
		// Wait for the channel to close
		var r api.GenerateResponse
		var sb strings.Builder
Bruce MacDonald's avatar
Bruce MacDonald committed
304
		for resp := range ch {
Bruce MacDonald's avatar
Bruce MacDonald committed
305
306
			var ok bool
			if r, ok = resp.(api.GenerateResponse); !ok {
Bruce MacDonald's avatar
Bruce MacDonald committed
307
308
309
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
Bruce MacDonald's avatar
Bruce MacDonald committed
310
			sb.WriteString(r.Response)
Bruce MacDonald's avatar
Bruce MacDonald committed
311
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
312
313
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
		return
	}

	streamResponse(c, ch)
}

func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if req.Model == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
340
341
	sessionDuration := defaultSessionDuration
	_, err = load(c, req.Model, req.Options, sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
342
	if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
343
344
345
346
347
348
349
350
351
		var pErr *fs.PathError
		switch {
		case errors.As(err, &pErr):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
352
353
354
		return
	}

355
	if !loaded.Options.EmbeddingOnly {
Bruce MacDonald's avatar
Bruce MacDonald committed
356
357
358
359
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

360
	embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
361
362
363
364
365
366
367
368
369
370
371
372
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

373
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
374
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
375
376
377
378
379
380
381
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
382
383
384
		return
	}

385
386
387
388
389
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

390
391
392
	ch := make(chan any)
	go func() {
		defer close(ch)
393
394
		fn := func(r api.ProgressResponse) {
			ch <- r
395
		}
396

397
398
399
400
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

401
402
403
404
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
405
			ch <- gin.H{"error": err.Error()}
406
407
408
		}
	}()

409
410
411
412
413
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

414
415
416
	streamResponse(c, ch)
}

417
func PushModelHandler(c *gin.Context) {
418
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
419
420
421
422
423
424
425
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
426
427
		return
	}
Michael Yang's avatar
Michael Yang committed
428

429
430
431
432
433
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

434
435
436
	ch := make(chan any)
	go func() {
		defer close(ch)
437
438
		fn := func(r api.ProgressResponse) {
			ch <- r
439
		}
440

441
442
443
444
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
445
446
447
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

448
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
449
			ch <- gin.H{"error": err.Error()}
450
451
452
		}
	}()

453
454
455
456
457
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

458
459
460
	streamResponse(c, ch)
}

461
func CreateModelHandler(c *gin.Context) {
462
	var req api.CreateRequest
Michael Yang's avatar
Michael Yang committed
463
464
465
466
467
468
469
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
470
		return
471
472
	}

Michael Yang's avatar
Michael Yang committed
473
474
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
475
476
477
		return
	}

478
479
	if err := ParseModelPath(req.Name).Validate(); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
480
481
482
		return
	}

Michael Yang's avatar
Michael Yang committed
483
484
	if req.Path == "" && req.Modelfile == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
485
486
		return
	}
Michael Yang's avatar
Michael Yang committed
487
488
489

	var modelfile io.Reader = strings.NewReader(req.Modelfile)
	if req.Path != "" && req.Modelfile == "" {
490
		mf, err := os.Open(req.Path)
Michael Yang's avatar
Michael Yang committed
491
492
493
494
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
495
		defer mf.Close()
Michael Yang's avatar
Michael Yang committed
496

497
		modelfile = mf
Michael Yang's avatar
Michael Yang committed
498
	}
Michael Yang's avatar
Michael Yang committed
499
500
501
502
503
504
505

	commands, err := parser.Parse(modelfile)
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
506
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
507
508
	go func() {
		defer close(ch)
509
510
		fn := func(resp api.ProgressResponse) {
			ch <- resp
511
512
		}

513
514
515
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

516
		if err := CreateModel(ctx, req.Name, filepath.Dir(req.Path), commands, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
517
			ch <- gin.H{"error": err.Error()}
518
		}
Michael Yang's avatar
Michael Yang committed
519
	}()
Michael Yang's avatar
Michael Yang committed
520

521
522
523
524
525
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
526
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
527
528
}

529
530
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
Michael Yang's avatar
Michael Yang committed
531
532
533
534
535
536
537
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
538
539
540
		return
	}

541
542
543
544
545
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

546
547
548
549
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
550
551
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
552
553
		return
	}
Michael Yang's avatar
Michael Yang committed
554
555
556
557
558
559
560
561
562
563
564
565

	manifestsPath, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := PruneDirectory(manifestsPath); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

566
	c.JSON(http.StatusOK, nil)
567
568
}

Patrick Devine's avatar
Patrick Devine committed
569
570
func ShowModelHandler(c *gin.Context) {
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
571
572
573
574
575
576
577
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
578
579
580
		return
	}

581
582
583
584
585
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
	resp, err := GetModelInfo(req.Name)
	if err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

func GetModelInfo(name string) (*api.ShowResponse, error) {
	model, err := GetModel(name)
	if err != nil {
		return nil, err
	}

	resp := &api.ShowResponse{
		License:  strings.Join(model.License, "\n"),
		System:   model.System,
		Template: model.Template,
	}

	mf, err := ShowModelfile(model)
	if err != nil {
		return nil, err
	}

	resp.Modelfile = mf

	var params []string
	cs := 30
	for k, v := range model.Options {
		switch val := v.(type) {
		case string:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
		case int:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
		case float64:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
		case bool:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
		case []interface{}:
			for _, nv := range val {
				switch nval := nv.(type) {
				case string:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
				case int:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
				case float64:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
				case bool:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
				}
			}
		}
	}
	resp.Parameters = strings.Join(params, "\n")

	return resp, nil
}

650
func ListModelsHandler(c *gin.Context) {
651
	models := make([]api.ModelResponse, 0)
Patrick Devine's avatar
Patrick Devine committed
652
653
654
655
656
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
657
658

	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
659
		if !info.IsDir() {
Michael Yang's avatar
Michael Yang committed
660
661
662
			dir, file := filepath.Split(path)
			dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
			tag := strings.Join([]string{dir, file}, ":")
663

664
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
665
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
666
			if err != nil {
667
668
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
669
			}
Michael Yang's avatar
Michael Yang committed
670
671

			models = append(models, api.ModelResponse{
Patrick Devine's avatar
Patrick Devine committed
672
673
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
674
				Digest:     digest,
Michael Yang's avatar
Michael Yang committed
675
676
				ModifiedAt: info.ModTime(),
			})
Patrick Devine's avatar
Patrick Devine committed
677
		}
Michael Yang's avatar
Michael Yang committed
678

Patrick Devine's avatar
Patrick Devine committed
679
		return nil
Michael Yang's avatar
Michael Yang committed
680
681
682
	}

	if err := filepath.Walk(fp, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
683
684
685
686
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
687
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
688
689
}

Patrick Devine's avatar
Patrick Devine committed
690
691
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
Michael Yang's avatar
Michael Yang committed
692
693
694
695
696
697
698
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
699
700
701
		return
	}

702
703
704
705
706
	if req.Source == "" || req.Destination == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
		return
	}

707
708
709
710
711
	if err := ParseModelPath(req.Destination).Validate(); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
712
713
714
715
716
717
718
719
720
721
	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Michael Yang's avatar
Michael Yang committed
722
func HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
723
724
725
726
727
728
729
730
731
732
733
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
734
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
735
736
737
}

func CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
738
739
740
741
742
743
	targetPath, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
744
	hash := sha256.New()
745
	temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
Michael Yang's avatar
Michael Yang committed
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	defer temp.Close()
	defer os.Remove(temp.Name())

	if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"})
		return
	}

	if err := temp.Close(); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := os.Rename(temp.Name(), targetPath); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
773
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
774
775
}

Michael Yang's avatar
Michael Yang committed
776
777
778
779
780
781
782
var defaultAllowOrigins = []string{
	"localhost",
	"127.0.0.1",
	"0.0.0.0",
}

func Serve(ln net.Listener, allowOrigins []string) error {
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Michael Yang's avatar
Michael Yang committed
799
800
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Michael Yang's avatar
Michael Yang committed
801
802
803
804
805
806
807
808
809
810

	config.AllowOrigins = allowOrigins
	for _, allowOrigin := range defaultAllowOrigins {
		config.AllowOrigins = append(config.AllowOrigins,
			fmt.Sprintf("http://%s", allowOrigin),
			fmt.Sprintf("https://%s", allowOrigin),
			fmt.Sprintf("http://%s:*", allowOrigin),
			fmt.Sprintf("https://%s:*", allowOrigin),
		)
	}
Michael Yang's avatar
Michael Yang committed
811

812
813
814
815
816
817
	workDir, err := os.MkdirTemp("", "ollama")
	if err != nil {
		return err
	}
	defer os.RemoveAll(workDir)

Bruce MacDonald's avatar
Bruce MacDonald committed
818
	r := gin.Default()
819
820
821
822
823
824
825
	r.Use(
		cors.New(config),
		func(c *gin.Context) {
			c.Set("workDir", workDir)
			c.Next()
		},
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
826

827
828
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
829
	r.POST("/api/chat", ChatHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
830
	r.POST("/api/embeddings", EmbeddingHandler)
831
832
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
833
	r.POST("/api/copy", CopyModelHandler)
834
	r.DELETE("/api/delete", DeleteModelHandler)
Patrick Devine's avatar
Patrick Devine committed
835
	r.POST("/api/show", ShowModelHandler)
Michael Yang's avatar
Michael Yang committed
836
	r.POST("/api/blobs/:digest", CreateBlobHandler)
Michael Yang's avatar
Michael Yang committed
837
	r.HEAD("/api/blobs/:digest", HeadBlobHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
838

Michael Yang's avatar
Michael Yang committed
839
840
841
842
843
844
845
846
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

		r.Handle(method, "/api/tags", ListModelsHandler)
	}

Michael Yang's avatar
Michael Yang committed
847
	log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
848
849
850
851
	s := &http.Server{
		Handler: r,
	}

852
853
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
854
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
855
856
	go func() {
		<-signals
857
858
		if loaded.runner != nil {
			loaded.runner.Close()
859
		}
860
		os.RemoveAll(workDir)
861
862
863
		os.Exit(0)
	}()

864
865
866
	if runtime.GOOS == "linux" {
		// check compatibility to log warnings
		if _, err := llm.CheckVRAM(); err != nil {
867
			log.Printf(err.Error())
868
869
870
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
871
872
	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
873

874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
899
func streamResponse(c *gin.Context, ch chan any) {
900
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
901
902
903
904
905
906
907
908
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
909
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
910
911
912
			return false
		}

913
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
914
915
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
916
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
917
918
919
920
921
922
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044

func ChatHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.ChatRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	// validate the request
	switch {
	case req.Model == "":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
	}

	sessionDuration := defaultSessionDuration
	model, err := load(c, req.Model, req.Options, sessionDuration)
	if err != nil {
		var pErr *fs.PathError
		switch {
		case errors.As(err, &pErr):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	// an empty request loads the model
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true})
		return
	}

	checkpointLoaded := time.Now()

	prompt, err := model.ChatPrompt(req.Messages)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	ch := make(chan any)

	go func() {
		defer close(ch)

		fn := func(r llm.PredictResult) {
			// Update model expiration
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)

			resp := api.ChatResponse{
				Model:     r.Model,
				CreatedAt: r.CreatedAt,
				Done:      r.Done,
				Metrics: api.Metrics{
					TotalDuration:      r.TotalDuration,
					LoadDuration:       r.LoadDuration,
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}

			if !r.Done {
				resp.Message = &api.Message{Role: "assistant", Content: r.Content}
			}

			ch <- resp
		}

		// Start prediction
		predictReq := llm.PredictOpts{
			Model:            model.Name,
			Prompt:           prompt,
			Format:           req.Format,
			CheckpointStart:  checkpointStart,
			CheckpointLoaded: checkpointLoaded,
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
		// Wait for the channel to close
		var r api.ChatResponse
		var sb strings.Builder
		for resp := range ch {
			var ok bool
			if r, ok = resp.(api.ChatResponse); !ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
			if r.Message != nil {
				sb.WriteString(r.Message.Content)
			}
		}
		r.Message = &api.Message{Role: "assistant", Content: sb.String()}
		c.JSON(http.StatusOK, r)
		return
	}

	streamResponse(c, ch)
}