routes.go 19.3 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
	"io"
9
	"io/fs"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
11
12
	"log"
	"net"
	"net/http"
13
	"os"
14
	"os/signal"
Michael Yang's avatar
Michael Yang committed
15
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
16
	"reflect"
17
	"runtime"
Patrick Devine's avatar
Patrick Devine committed
18
	"strconv"
Michael Yang's avatar
Michael Yang committed
19
	"strings"
Michael Yang's avatar
Michael Yang committed
20
	"sync"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
	"github.com/gin-gonic/gin"

Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
	"github.com/jmorganca/ollama/api"
28
	"github.com/jmorganca/ollama/llm"
Michael Yang's avatar
Michael Yang committed
29
	"github.com/jmorganca/ollama/parser"
Michael Yang's avatar
Michael Yang committed
30
	"github.com/jmorganca/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
31
32
)

Michael Yang's avatar
Michael Yang committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
47
var loaded struct {
Michael Yang's avatar
Michael Yang committed
48
49
	mu sync.Mutex

50
	runner llm.LLM
Michael Yang's avatar
Michael Yang committed
51
52
53

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
54

55
56
	*Model
	*api.Options
Michael Yang's avatar
Michael Yang committed
57
58
}

59
60
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
61
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
62
func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
63
64
65
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
66
		return err
67
68
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
69
70
	if err := opts.FromMap(reqOpts); err != nil {
		return err
71
72
	}

73
	// check if the loaded model is still running in a subprocess, in case something unexpected happened
74
75
	if loaded.runner != nil {
		if err := loaded.runner.Ping(ctx); err != nil {
76
77
			log.Print("loaded llm process not responding, closing now")
			// the subprocess is no longer running, so close it
78
79
80
81
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
82
83
84
		}
	}

85
86
87
88
89
90
91
	needLoad := loaded.runner == nil || // is there a model loaded?
		loaded.ModelPath != model.ModelPath || // has the base model changed?
		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
		!reflect.DeepEqual(loaded.Options.Runner, opts.Runner) // have the runner options changed?

	if needLoad {
		if loaded.runner != nil {
92
			log.Println("changing loaded model")
93
94
95
96
			loaded.runner.Close()
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
97
		}
Michael Yang's avatar
Michael Yang committed
98

99
		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
100
		if err != nil {
101
102
103
104
105
106
107
			// some older models are not compatible with newer versions of llama.cpp
			// show a generalized compatibility error until there is a better way to
			// check for model compatibility
			if strings.Contains(err.Error(), "failed to load model") {
				err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
108
			return err
Michael Yang's avatar
Michael Yang committed
109
110
		}

111
112
113
		loaded.Model = model
		loaded.runner = llmRunner
		loaded.Options = &opts
Michael Yang's avatar
Michael Yang committed
114
	}
115

Michael Yang's avatar
Michael Yang committed
116
117
118
119
	// update options for the loaded llm
	// TODO(mxyng): this isn't thread safe, but it should be fine for now
	loaded.runner.SetOptions(opts)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
120
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
121

Jeffrey Morgan's avatar
Jeffrey Morgan committed
122
123
124
125
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
126

Jeffrey Morgan's avatar
Jeffrey Morgan committed
127
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
128
129
130
				return
			}

131
132
			if loaded.runner != nil {
				loaded.runner.Close()
Michael Yang's avatar
Michael Yang committed
133
134
			}

135
136
137
			loaded.runner = nil
			loaded.Model = nil
			loaded.Options = nil
Michael Yang's avatar
Michael Yang committed
138
		})
Michael Yang's avatar
Michael Yang committed
139
	}
140

Jeffrey Morgan's avatar
Jeffrey Morgan committed
141
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
142
143
144
145
146
147
148
149
150
151
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
Michael Yang's avatar
Michael Yang committed
152
153
154
155
156
157
158
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
159
160
161
		return
	}

162
163
164
	// validate the request
	switch {
	case req.Model == "":
165
166
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
167
168
169
	case len(req.Format) > 0 && req.Format != "json":
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
		return
170
171
172
	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
		return
173
174
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
175
176
	model, err := GetModel(req.Model)
	if err != nil {
177
178
179
180
181
		var pErr *fs.PathError
		if errors.As(err, &pErr) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
182
183
184
185
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

186
187
188
189
190
	workDir := c.GetString("workDir")

	// TODO: set this duration from the request if specified
	sessionDuration := defaultSessionDuration
	if err := load(c.Request.Context(), workDir, model, req.Options, sessionDuration); err != nil {
191
192
193
194
		if errors.Is(err, api.ErrInvalidOpts) {
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
195
196
197
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
198

Michael Yang's avatar
Michael Yang committed
199
200
	checkpointLoaded := time.Now()

201
202
203
204
205
206
207
	prompt := req.Prompt
	if !req.Raw {
		prompt, err = model.Prompt(req)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
Michael Yang's avatar
Michael Yang committed
208
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
209

Michael Yang's avatar
Michael Yang committed
210
211
212
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
213
214
215
216
217
218
		// an empty request loads the model
		if req.Prompt == "" && req.Template == "" && req.System == "" {
			ch <- api.GenerateResponse{CreatedAt: time.Now().UTC(), Model: req.Model, Done: true}
			return
		}

Michael Yang's avatar
Michael Yang committed
219
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
220
221
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
222

Michael Yang's avatar
Michael Yang committed
223
224
225
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
226
227
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
228
229
			}

230
231
232
233
234
			if req.Raw {
				// in raw mode the client must manage history on their own
				r.Context = nil
			}

Michael Yang's avatar
Michael Yang committed
235
			ch <- r
Michael Yang's avatar
Michael Yang committed
236
237
		}

238
		if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
239
			ch <- gin.H{"error": err.Error()}
Michael Yang's avatar
Michael Yang committed
240
		}
Michael Yang's avatar
Michael Yang committed
241
	}()
Michael Yang's avatar
Michael Yang committed
242

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
	if req.Stream != nil && !*req.Stream {
		var response api.GenerateResponse
		generated := ""
		for resp := range ch {
			if r, ok := resp.(api.GenerateResponse); ok {
				generated += r.Response
				response = r
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}
		response.Response = generated
		c.JSON(http.StatusOK, response)
		return
	}

Michael Yang's avatar
Michael Yang committed
260
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
261
}
Michael Yang's avatar
Michael Yang committed
262

Bruce MacDonald's avatar
Bruce MacDonald committed
263
264
265
266
267
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
268
269
270
271
272
273
274
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
275
276
277
		return
	}

278
279
280
281
282
	if req.Model == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
		return
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
283
284
285
286
287
	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
288
289
290

	workDir := c.GetString("workDir")
	if err := load(c.Request.Context(), workDir, model, req.Options, 5*time.Minute); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
291
292
293
294
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

295
	if !loaded.Options.EmbeddingOnly {
Bruce MacDonald's avatar
Bruce MacDonald committed
296
297
298
299
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

300
	embedding, err := loaded.runner.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
301
302
303
304
305
306
307
308
309
310
311
312
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

313
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
314
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
315
316
317
318
319
320
321
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
322
323
324
		return
	}

325
326
327
328
329
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

330
331
332
	ch := make(chan any)
	go func() {
		defer close(ch)
333
334
		fn := func(r api.ProgressResponse) {
			ch <- r
335
		}
336

337
338
339
340
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

341
342
343
344
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
345
			ch <- gin.H{"error": err.Error()}
346
347
348
		}
	}()

349
350
351
352
353
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

354
355
356
	streamResponse(c, ch)
}

357
func PushModelHandler(c *gin.Context) {
358
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
359
360
361
362
363
364
365
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
366
367
		return
	}
Michael Yang's avatar
Michael Yang committed
368

369
370
371
372
373
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

374
375
376
	ch := make(chan any)
	go func() {
		defer close(ch)
377
378
		fn := func(r api.ProgressResponse) {
			ch <- r
379
		}
380

381
382
383
384
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
385
386
387
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

388
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
389
			ch <- gin.H{"error": err.Error()}
390
391
392
		}
	}()

393
394
395
396
397
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

398
399
400
	streamResponse(c, ch)
}

401
func CreateModelHandler(c *gin.Context) {
402
	var req api.CreateRequest
Michael Yang's avatar
Michael Yang committed
403
404
405
406
407
408
409
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
410
		return
411
412
	}

Michael Yang's avatar
Michael Yang committed
413
414
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
415
416
417
		return
	}

Michael Yang's avatar
Michael Yang committed
418
419
	if req.Path == "" && req.Modelfile == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
420
421
		return
	}
Michael Yang's avatar
Michael Yang committed
422
423
424
425
426
427
428
429
430
431
432
433

	var modelfile io.Reader = strings.NewReader(req.Modelfile)
	if req.Path != "" && req.Modelfile == "" {
		bin, err := os.Open(req.Path)
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
		defer bin.Close()

		modelfile = bin
	}
Michael Yang's avatar
Michael Yang committed
434
435
436
437
438
439
440

	commands, err := parser.Parse(modelfile)
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
441
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
442
443
	go func() {
		defer close(ch)
444
445
		fn := func(resp api.ProgressResponse) {
			ch <- resp
446
447
		}

448
449
450
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
451
		if err := CreateModel(ctx, req.Name, commands, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
452
			ch <- gin.H{"error": err.Error()}
453
		}
Michael Yang's avatar
Michael Yang committed
454
	}()
Michael Yang's avatar
Michael Yang committed
455

456
457
458
459
460
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
461
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
462
463
}

464
465
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
Michael Yang's avatar
Michael Yang committed
466
467
468
469
470
471
472
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
473
474
475
		return
	}

476
477
478
479
480
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

481
482
483
484
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
485
486
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
487
488
		return
	}
Michael Yang's avatar
Michael Yang committed
489
490
491
492
493
494
495
496
497
498
499
500

	manifestsPath, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := PruneDirectory(manifestsPath); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

501
	c.JSON(http.StatusOK, nil)
502
503
}

Patrick Devine's avatar
Patrick Devine committed
504
505
func ShowModelHandler(c *gin.Context) {
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
506
507
508
509
510
511
512
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
513
514
515
		return
	}

516
517
518
519
520
	if req.Name == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "name is required"})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
	resp, err := GetModelInfo(req.Name)
	if err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

func GetModelInfo(name string) (*api.ShowResponse, error) {
	model, err := GetModel(name)
	if err != nil {
		return nil, err
	}

	resp := &api.ShowResponse{
		License:  strings.Join(model.License, "\n"),
		System:   model.System,
		Template: model.Template,
	}

	mf, err := ShowModelfile(model)
	if err != nil {
		return nil, err
	}

	resp.Modelfile = mf

	var params []string
	cs := 30
	for k, v := range model.Options {
		switch val := v.(type) {
		case string:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
		case int:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
		case float64:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
		case bool:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
		case []interface{}:
			for _, nv := range val {
				switch nval := nv.(type) {
				case string:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
				case int:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
				case float64:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
				case bool:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
				}
			}
		}
	}
	resp.Parameters = strings.Join(params, "\n")

	return resp, nil
}

585
func ListModelsHandler(c *gin.Context) {
586
	models := make([]api.ModelResponse, 0)
Patrick Devine's avatar
Patrick Devine committed
587
588
589
590
591
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
592
593

	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
594
		if !info.IsDir() {
Michael Yang's avatar
Michael Yang committed
595
596
597
			dir, file := filepath.Split(path)
			dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
			tag := strings.Join([]string{dir, file}, ":")
598

599
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
600
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
601
			if err != nil {
602
603
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
604
			}
Michael Yang's avatar
Michael Yang committed
605
606

			models = append(models, api.ModelResponse{
Patrick Devine's avatar
Patrick Devine committed
607
608
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
609
				Digest:     digest,
Michael Yang's avatar
Michael Yang committed
610
611
				ModifiedAt: info.ModTime(),
			})
Patrick Devine's avatar
Patrick Devine committed
612
		}
Michael Yang's avatar
Michael Yang committed
613

Patrick Devine's avatar
Patrick Devine committed
614
		return nil
Michael Yang's avatar
Michael Yang committed
615
616
617
	}

	if err := filepath.Walk(fp, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
618
619
620
621
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
622
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
623
624
}

Patrick Devine's avatar
Patrick Devine committed
625
626
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
Michael Yang's avatar
Michael Yang committed
627
628
629
630
631
632
633
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
634
635
636
		return
	}

637
638
639
640
641
	if req.Source == "" || req.Destination == "" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "source add destination are required"})
		return
	}

Patrick Devine's avatar
Patrick Devine committed
642
643
644
645
646
647
648
649
650
651
	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Michael Yang's avatar
Michael Yang committed
652
653
654
655
656
657
658
var defaultAllowOrigins = []string{
	"localhost",
	"127.0.0.1",
	"0.0.0.0",
}

func Serve(ln net.Listener, allowOrigins []string) error {
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
	if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Michael Yang's avatar
Michael Yang committed
675
676
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Michael Yang's avatar
Michael Yang committed
677
678
679
680
681
682
683
684
685
686

	config.AllowOrigins = allowOrigins
	for _, allowOrigin := range defaultAllowOrigins {
		config.AllowOrigins = append(config.AllowOrigins,
			fmt.Sprintf("http://%s", allowOrigin),
			fmt.Sprintf("https://%s", allowOrigin),
			fmt.Sprintf("http://%s:*", allowOrigin),
			fmt.Sprintf("https://%s:*", allowOrigin),
		)
	}
Michael Yang's avatar
Michael Yang committed
687

688
689
690
691
692
693
	workDir, err := os.MkdirTemp("", "ollama")
	if err != nil {
		return err
	}
	defer os.RemoveAll(workDir)

Bruce MacDonald's avatar
Bruce MacDonald committed
694
	r := gin.Default()
695
696
697
698
699
700
701
	r.Use(
		cors.New(config),
		func(c *gin.Context) {
			c.Set("workDir", workDir)
			c.Next()
		},
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
702

703
704
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
705
	r.POST("/api/embeddings", EmbeddingHandler)
706
707
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
708
	r.POST("/api/copy", CopyModelHandler)
709
	r.DELETE("/api/delete", DeleteModelHandler)
Patrick Devine's avatar
Patrick Devine committed
710
	r.POST("/api/show", ShowModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
711

Michael Yang's avatar
Michael Yang committed
712
713
714
715
716
717
718
719
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

		r.Handle(method, "/api/tags", ListModelsHandler)
	}

Michael Yang's avatar
Michael Yang committed
720
	log.Printf("Listening on %s (version %s)", ln.Addr(), version.Version)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
721
722
723
724
	s := &http.Server{
		Handler: r,
	}

725
726
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
727
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
728
729
	go func() {
		<-signals
730
731
		if loaded.runner != nil {
			loaded.runner.Close()
732
		}
733
		os.RemoveAll(workDir)
734
735
736
		os.Exit(0)
	}()

737
738
739
	if runtime.GOOS == "linux" {
		// check compatibility to log warnings
		if _, err := llm.CheckVRAM(); err != nil {
740
			log.Printf("Warning: GPU support may not be enabled, check you have installed GPU drivers: %v", err)
741
742
743
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
744
745
	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
746

747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
772
func streamResponse(c *gin.Context, ch chan any) {
773
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
774
775
776
777
778
779
780
781
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
782
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
783
784
785
			return false
		}

786
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
787
788
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
789
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
790
791
792
793
794
795
			return false
		}

		return true
	})
}