routes.go 26 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"crypto/sha256"
Michael Yang's avatar
Michael Yang committed
6
	"encoding/json"
7
	"errors"
8
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
9
	"io"
10
	"io/fs"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
12
13
	"log"
	"net"
	"net/http"
14
	"os"
15
	"os/signal"
Michael Yang's avatar
Michael Yang committed
16
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17
	"reflect"
18
	"runtime"
Patrick Devine's avatar
Patrick Devine committed
19
	"strconv"
Michael Yang's avatar
Michael Yang committed
20
	"strings"
Michael Yang's avatar
Michael Yang committed
21
	"sync"
22
	"syscall"
23
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24

Michael Yang's avatar
Michael Yang committed
25
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
	"github.com/gin-gonic/gin"

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
	"github.com/jmorganca/ollama/api"
29
	"github.com/jmorganca/ollama/llm"
Michael Yang's avatar
Michael Yang committed
30
	"github.com/jmorganca/ollama/parser"
Michael Yang's avatar
Michael Yang committed
31
	"github.com/jmorganca/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
32
33
)

Michael Yang's avatar
Michael Yang committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
48
var loaded struct {
Michael Yang's avatar
Michael Yang committed
49
50
	mu sync.Mutex

51
	runner llm.LLM
Michael Yang's avatar
Michael Yang committed
52
53
54

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55

56
57
	*Model
	*api.Options
Michael Yang's avatar
Michael Yang committed
58
59
}

60
61
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
62
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
Bruce MacDonald's avatar
Bruce MacDonald committed
63
64
65
66
67
68
69
70
func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sessionDuration time.Duration) (*Model, error) {
	model, err := GetModel(modelName)
	if err != nil {
		return nil, err
	}

	workDir := c.GetString("workDir")

71
72
73
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
74
		return nil, err
75
76
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
77
	if err := opts.FromMap(reqOpts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
78
		return nil, err
79
80
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
81
82
	ctx := c.Request.Context()

83
	// check if the loaded model is still running in a subprocess, in case something unexpected happened
84
85
	if loaded.runner != nil {
		if err := loaded.runner.Ping(ctx); err != nil {
86
87
			log.Print("loaded llm process not responding, closing now")
			// the subprocess is no longer running, so close it
88
89
90
91
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
92
93
94
		}
	}

95
96
97
98
99
100
101
	needLoad := loaded.runner == nil || // is there a model loaded?
		loaded.ModelPath != model.ModelPath || // has the base model changed?
		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
		!reflect.DeepEqual(loaded.Options.Runner, opts.Runner) // have the runner options changed?

	if needLoad {
		if loaded.runner != nil {
102
			log.Println("changing loaded model")
103
104
105
106
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
107
		}
Michael Yang's avatar
Michael Yang committed
108

109
		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
110
		if err != nil {
111
112
113
114
115
116
117
			// some older models are not compatible with newer versions of llama.cpp
			// show a generalized compatibility error until there is a better way to
			// check for model compatibility
			if strings.Contains(err.Error(), "failed to load model") {
				err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
118
			return nil, err
Michael Yang's avatar
Michael Yang committed
119
120
		}

121
122
123
		loaded.Model = model
		loaded.runner = llmRunner
		loaded.Options = &opts
Michael Yang's avatar
Michael Yang committed
124
	}
125

Michael Yang's avatar
Michael Yang committed
126
127
128
129
	// update options for the loaded llm
	// TODO(mxyng): this isn't thread safe, but it should be fine for now
	loaded.runner.SetOptions(opts)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
130
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
131

Jeffrey Morgan's avatar
Jeffrey Morgan committed
132
133
134
135
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
136

Jeffrey Morgan's avatar
Jeffrey Morgan committed
137
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
138
139
140
				return
			}

141
142
			if loaded.runner != nil {
				loaded.runner.Close()
Michael Yang's avatar
Michael Yang committed
143
144
			}

145
146
147
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
148
		})
Michael Yang's avatar
Michael Yang committed
149
	}
150

Jeffrey Morgan's avatar
Jeffrey Morgan committed
151
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
152
	return model, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
153
154
155
156
157
158
159
160
161
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
Michael Yang's avatar
Michael Yang committed
162
163
164
165
166
167
168
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
169
170
171
		return
	}

172
173
174
	// validate the request
	switch {
	case req.Model == "":
175
176
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
177
178
179
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
180
181
182
	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
		return
183
184
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
185
186
	sessionDuration := defaultSessionDuration
	model, err := load(c, req.Model, req.Options, sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
187
	if err != nil {
188
		var pErr *fs.PathError
Bruce MacDonald's avatar
Bruce MacDonald committed
189
190
		switch {
		case errors.As(err, &pErr):
191
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
192
193
194
195
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
196
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
197
198
199
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
200
201
202
	// an empty request loads the model
	if req.Prompt == "" && req.Template == "" && req.System == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true})
Bruce MacDonald's avatar
Bruce MacDonald committed
203
204
		return
	}
Michael Yang's avatar
Michael Yang committed
205

Michael Yang's avatar
Michael Yang committed
206
207
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
	var prompt string
	sendContext := false
	switch {
	case req.Raw:
		prompt = req.Prompt
	case req.Prompt != "":
		if req.Template != "" {
			// override the default model template
			model.Template = req.Template
		}

		var rebuild strings.Builder
		if req.Context != nil {
			// TODO: context is deprecated, at some point the context logic within this conditional should be removed
			prevCtx, err := loaded.runner.Decode(c.Request.Context(), req.Context)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			// Remove leading spaces from prevCtx if present
			prevCtx = strings.TrimPrefix(prevCtx, " ")
			rebuild.WriteString(prevCtx)
		}
		p, err := model.Prompt(PromptVars{
			System: req.System,
			Prompt: req.Prompt,
		})
236
237
238
239
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
240
241
242
		rebuild.WriteString(p)
		prompt = rebuild.String()
		sendContext = true
Michael Yang's avatar
Michael Yang committed
243
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
244

Michael Yang's avatar
Michael Yang committed
245
	ch := make(chan any)
Bruce MacDonald's avatar
Bruce MacDonald committed
246
	var generated strings.Builder
Michael Yang's avatar
Michael Yang committed
247
248
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
249

Bruce MacDonald's avatar
Bruce MacDonald committed
250
251
		fn := func(r llm.PredictResponse) {
			// Update model expiration
Jeffrey Morgan's avatar
Jeffrey Morgan committed
252
253
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
254

Bruce MacDonald's avatar
Bruce MacDonald committed
255
256
257
258
			// Build up the full response
			if _, err := generated.WriteString(r.Content); err != nil {
				ch <- gin.H{"error": err.Error()}
				return
Michael Yang's avatar
Michael Yang committed
259
260
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
261
262
263
264
265
266
267
268
269
270
271
272
273
			resp := api.GenerateResponse{
				Model:     r.Model,
				CreatedAt: r.CreatedAt,
				Done:      r.Done,
				Response:  r.Content,
				EvalMetrics: api.EvalMetrics{
					TotalDuration:      r.TotalDuration,
					LoadDuration:       r.LoadDuration,
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
274
275
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
276
277
278
279
280
281
282
283
284
285
			if r.Done && sendContext {
				embd, err := loaded.runner.Encode(c.Request.Context(), req.Prompt+generated.String())
				if err != nil {
					ch <- gin.H{"error": err.Error()}
					return
				}
				r.Context = embd
			}

			ch <- resp
Michael Yang's avatar
Michael Yang committed
286
287
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
288
289
290
291
292
293
294
295
296
		// Start prediction
		predictReq := llm.PredictRequest{
			Model:            model.Name,
			Prompt:           prompt,
			Format:           req.Format,
			CheckpointStart:  checkpointStart,
			CheckpointLoaded: checkpointLoaded,
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
297
			ch <- gin.H{"error": err.Error()}
Michael Yang's avatar
Michael Yang committed
298
		}
Michael Yang's avatar
Michael Yang committed
299
	}()
Michael Yang's avatar
Michael Yang committed
300

301
	if req.Stream != nil && !*req.Stream {
Bruce MacDonald's avatar
Bruce MacDonald committed
302
303
304
		// Wait for the channel to close
		var r api.GenerateResponse
		var sb strings.Builder
305
		for resp := range ch {
Bruce MacDonald's avatar
Bruce MacDonald committed
306
307
			var ok bool
			if r, ok = resp.(api.GenerateResponse); !ok {
308
309
310
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
Bruce MacDonald's avatar
Bruce MacDonald committed
311
			sb.WriteString(r.Response)
312
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
313
314
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
315
316
317
		return
	}

Michael Yang's avatar
Michael Yang committed
318
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
319
}
Michael Yang's avatar
Michael Yang committed
320

Bruce MacDonald's avatar
Bruce MacDonald committed
321
func ChatHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
322
323
324
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

Bruce MacDonald's avatar
Bruce MacDonald committed
325
326
327
	checkpointStart := time.Now()

	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
328
329
330
331
332
333
334
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
335
336
337
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
338
339
340
	// validate the request
	switch {
	case req.Model == "":
341
342
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
Bruce MacDonald's avatar
Bruce MacDonald committed
343
344
345
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
346
347
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
348
349
	sessionDuration := defaultSessionDuration
	model, err := load(c, req.Model, req.Options, sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
350
	if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
351
352
353
354
355
356
357
358
359
		var pErr *fs.PathError
		switch {
		case errors.As(err, &pErr):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
360
361
		return
	}
362

Bruce MacDonald's avatar
Bruce MacDonald committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
	// an empty request loads the model
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true})
		return
	}

	checkpointLoaded := time.Now()

	if req.Template != "" {
		// override the default model template
		model.Template = req.Template
	}
	prompt, err := model.ChatPrompt(req.Messages)
	if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
377
378
379
380
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
	ch := make(chan any)

	go func() {
		defer close(ch)

		fn := func(r llm.PredictResponse) {
			// Update model expiration
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)

			resp := api.ChatResponse{
				Model:     r.Model,
				CreatedAt: r.CreatedAt,
				Done:      r.Done,
				EvalMetrics: api.EvalMetrics{
					TotalDuration:      r.TotalDuration,
					LoadDuration:       r.LoadDuration,
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}

			if !r.Done {
				resp.Message = &api.Message{Role: "assistant", Content: r.Content}
			}

			ch <- resp
		}

		// Start prediction
		predictReq := llm.PredictRequest{
			Model:            model.Name,
			Prompt:           prompt,
			Format:           req.Format,
			CheckpointStart:  checkpointStart,
			CheckpointLoaded: checkpointLoaded,
		}
		if err := loaded.runner.Predict(c.Request.Context(), predictReq, fn); err != nil {
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
		// Wait for the channel to close
		var r api.ChatResponse
		var sb strings.Builder
		for resp := range ch {
			var ok bool
			if r, ok = resp.(api.ChatResponse); !ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
			if r.Message != nil {
				sb.WriteString(r.Message.Content)
			}
		}
		r.Message = &api.Message{Role: "assistant", Content: sb.String()}
		c.JSON(http.StatusOK, r)
		return
	}

	streamResponse(c, ch)
}

func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if req.Model == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

	sessionDuration := defaultSessionDuration
	_, err = load(c, req.Model, req.Options, sessionDuration)
	if err != nil {
		var pErr *fs.PathError
		switch {
		case errors.As(err, &pErr):
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
		case errors.Is(err, api.ErrInvalidOpts):
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

482
	if !loaded.Options.EmbeddingOnly {
Bruce MacDonald's avatar
Bruce MacDonald committed
483
484
485
486
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

487
	embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
488
489
490
491
492
493
494
495
496
497
498
499
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

500
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
501
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
502
503
504
505
506
507
508
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
509
510
511
		return
	}

512
513
514
515
516
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

517
518
519
	ch := make(chan any)
	go func() {
		defer close(ch)
520
521
		fn := func(r api.ProgressResponse) {
			ch <- r
522
		}
523

524
525
526
527
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

528
529
530
531
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
532
			ch <- gin.H{"error": err.Error()}
533
534
535
		}
	}()

536
537
538
539
540
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

541
542
543
	streamResponse(c, ch)
}

544
func PushModelHandler(c *gin.Context) {
545
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
546
547
548
549
550
551
552
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
553
554
		return
	}
Michael Yang's avatar
Michael Yang committed
555

556
557
558
559
560
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

561
562
563
	ch := make(chan any)
	go func() {
		defer close(ch)
564
565
		fn := func(r api.ProgressResponse) {
			ch <- r
566
		}
567

568
569
570
571
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
572
573
574
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

575
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
576
			ch <- gin.H{"error": err.Error()}
577
578
579
		}
	}()

580
581
582
583
584
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

585
586
587
	streamResponse(c, ch)
}

588
func CreateModelHandler(c *gin.Context) {
589
	var req api.CreateRequest
Michael Yang's avatar
Michael Yang committed
590
591
592
593
594
595
596
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
597
		return
598
599
	}

Michael Yang's avatar
Michael Yang committed
600
601
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
602
603
604
		return
	}

605
606
	if err := ParseModelPath(req.Name).Validate(); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
607
608
609
		return
	}

Michael Yang's avatar
Michael Yang committed
610
611
	if req.Path == "" && req.Modelfile == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
612
613
		return
	}
Michael Yang's avatar
Michael Yang committed
614
615
616

	var modelfile io.Reader = strings.NewReader(req.Modelfile)
	if req.Path != "" && req.Modelfile == "" {
617
		mf, err := os.Open(req.Path)
Michael Yang's avatar
Michael Yang committed
618
619
620
621
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
622
		defer mf.Close()
Michael Yang's avatar
Michael Yang committed
623

624
		modelfile = mf
Michael Yang's avatar
Michael Yang committed
625
	}
Michael Yang's avatar
Michael Yang committed
626
627
628
629
630
631
632

	commands, err := parser.Parse(modelfile)
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
633
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
634
635
	go func() {
		defer close(ch)
636
637
		fn := func(resp api.ProgressResponse) {
			ch <- resp
638
639
		}

640
641
642
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

643
		if err := CreateModel(ctx, req.Name, filepath.Dir(req.Path), commands, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
644
			ch <- gin.H{"error": err.Error()}
645
		}
Michael Yang's avatar
Michael Yang committed
646
	}()
Michael Yang's avatar
Michael Yang committed
647

648
649
650
651
652
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
653
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
654
655
}

656
657
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
Michael Yang's avatar
Michael Yang committed
658
659
660
661
662
663
664
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
665
666
667
		return
	}

668
669
670
671
672
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

673
674
675
676
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
677
678
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
679
680
		return
	}
Michael Yang's avatar
Michael Yang committed
681
682
683
684
685
686
687
688
689
690
691
692

	manifestsPath, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := PruneDirectory(manifestsPath); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

693
	c.JSON(http.StatusOK, nil)
694
695
}

Patrick Devine's avatar
Patrick Devine committed
696
697
func ShowModelHandler(c *gin.Context) {
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
698
699
700
701
702
703
704
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
705
706
707
		return
	}

708
709
710
711
712
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
	resp, err := GetModelInfo(req.Name)
	if err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

func GetModelInfo(name string) (*api.ShowResponse, error) {
	model, err := GetModel(name)
	if err != nil {
		return nil, err
	}

	resp := &api.ShowResponse{
		License:  strings.Join(model.License, "\n"),
		System:   model.System,
		Template: model.Template,
	}

	mf, err := ShowModelfile(model)
	if err != nil {
		return nil, err
	}

	resp.Modelfile = mf

	var params []string
	cs := 30
	for k, v := range model.Options {
		switch val := v.(type) {
		case string:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
		case int:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
		case float64:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
		case bool:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
		case []interface{}:
			for _, nv := range val {
				switch nval := nv.(type) {
				case string:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
				case int:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
				case float64:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
				case bool:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
				}
			}
		}
	}
	resp.Parameters = strings.Join(params, "\n")

	return resp, nil
}

777
func ListModelsHandler(c *gin.Context) {
778
	models := make([]api.ModelResponse, 0)
Patrick Devine's avatar
Patrick Devine committed
779
780
781
782
783
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
784
785

	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
786
		if !info.IsDir() {
Michael Yang's avatar
Michael Yang committed
787
788
789
			dir, file := filepath.Split(path)
			dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
			tag := strings.Join([]string{dir, file}, ":")
790

791
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
792
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
793
			if err != nil {
794
795
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
796
			}
Michael Yang's avatar
Michael Yang committed
797
798

			models = append(models, api.ModelResponse{
Patrick Devine's avatar
Patrick Devine committed
799
800
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
801
				Digest:     digest,
Michael Yang's avatar
Michael Yang committed
802
803
				ModifiedAt: info.ModTime(),
			})
Patrick Devine's avatar
Patrick Devine committed
804
		}
Michael Yang's avatar
Michael Yang committed
805

Patrick Devine's avatar
Patrick Devine committed
806
		return nil
Michael Yang's avatar
Michael Yang committed
807
808
809
	}

	if err := filepath.Walk(fp, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
810
811
812
813
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
814
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
815
816
}

Patrick Devine's avatar
Patrick Devine committed
817
818
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
Michael Yang's avatar
Michael Yang committed
819
820
821
822
823
824
825
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
826
827
828
		return
	}

829
830
831
832
833
	if req.Source == "" || req.Destination == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
		return
	}

834
835
836
837
838
	if err := ParseModelPath(req.Destination).Validate(); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
839
840
841
842
843
844
845
846
847
848
	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Michael Yang's avatar
Michael Yang committed
849
func HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
850
851
852
853
854
855
856
857
858
859
860
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
861
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
862
863
864
}

func CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
865
866
867
868
869
870
	targetPath, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
871
	hash := sha256.New()
872
	temp, err := os.CreateTemp(filepath.Dir(targetPath), c.Param("digest")+"-")
Michael Yang's avatar
Michael Yang committed
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	defer temp.Close()
	defer os.Remove(temp.Name())

	if _, err := io.Copy(temp, io.TeeReader(c.Request.Body, hash)); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if fmt.Sprintf("sha256:%x", hash.Sum(nil)) != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "digest does not match body"})
		return
	}

	if err := temp.Close(); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := os.Rename(temp.Name(), targetPath); err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
900
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
901
902
}

Michael Yang's avatar
Michael Yang committed
903
904
905
906
907
908
909
var defaultAllowOrigins = []string{
	"localhost",
	"127.0.0.1",
	"0.0.0.0",
}

func Serve(ln net.Listener, allowOrigins []string) error {
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Michael Yang's avatar
Michael Yang committed
926
927
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Michael Yang's avatar
Michael Yang committed
928
929
930
931
932
933
934
935
936
937

	config.AllowOrigins = allowOrigins
	for _, allowOrigin := range defaultAllowOrigins {
		config.AllowOrigins = append(config.AllowOrigins,
			fmt.Sprintf("http://%s", allowOrigin),
			fmt.Sprintf("https://%s", allowOrigin),
			fmt.Sprintf("http://%s:*", allowOrigin),
			fmt.Sprintf("https://%s:*", allowOrigin),
		)
	}
Michael Yang's avatar
Michael Yang committed
938

939
940
941
942
943
944
	workDir, err := os.MkdirTemp("", "ollama")
	if err != nil {
		return err
	}
	defer os.RemoveAll(workDir)

Bruce MacDonald's avatar
Bruce MacDonald committed
945
	r := gin.Default()
946
947
948
949
950
951
952
	r.Use(
		cors.New(config),
		func(c *gin.Context) {
			c.Set("workDir", workDir)
			c.Next()
		},
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
953

954
955
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
956
	r.POST("/api/chat", ChatHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
957
	r.POST("/api/embeddings", EmbeddingHandler)
958
959
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
960
	r.POST("/api/copy", CopyModelHandler)
961
	r.DELETE("/api/delete", DeleteModelHandler)
Patrick Devine's avatar
Patrick Devine committed
962
	r.POST("/api/show", ShowModelHandler)
Michael Yang's avatar
Michael Yang committed
963
	r.POST("/api/blobs/:digest", CreateBlobHandler)
Michael Yang's avatar
Michael Yang committed
964
	r.HEAD("/api/blobs/:digest", HeadBlobHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
965

Michael Yang's avatar
Michael Yang committed
966
967
968
969
970
971
972
973
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

		r.Handle(method, "/api/tags", ListModelsHandler)
	}

Michael Yang's avatar
Michael Yang committed
974
	log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
975
976
977
978
	s := &http.Server{
		Handler: r,
	}

979
980
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
981
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
982
983
	go func() {
		<-signals
984
985
		if loaded.runner != nil {
			loaded.runner.Close()
986
		}
987
		os.RemoveAll(workDir)
988
989
990
		os.Exit(0)
	}()

991
992
993
	if runtime.GOOS == "linux" {
		// check compatibility to log warnings
		if _, err := llm.CheckVRAM(); err != nil {
994
			log.Printf(err.Error())
995
996
997
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
998
999
	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
1000

1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1026
func streamResponse(c *gin.Context, ch chan any) {
1027
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1028
1029
1030
1031
1032
1033
1034
1035
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1036
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
1037
1038
1039
			return false
		}

1040
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1041
1042
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1043
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
1044
1045
1046
1047
1048
1049
			return false
		}

		return true
	})
}