routes.go 16.1 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
	"io"
9
	"io/fs"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
11
12
	"log"
	"net"
	"net/http"
13
	"os"
14
	"os/signal"
Michael Yang's avatar
Michael Yang committed
15
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
16
	"reflect"
17
	"runtime"
Patrick Devine's avatar
Patrick Devine committed
18
	"strconv"
Michael Yang's avatar
Michael Yang committed
19
	"strings"
Michael Yang's avatar
Michael Yang committed
20
	"sync"
21
	"syscall"
22
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Michael Yang's avatar
Michael Yang committed
24
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
26
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
	"github.com/jmorganca/ollama/api"
29
	"github.com/jmorganca/ollama/llm"
30
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
31
32
)

Michael Yang's avatar
Michael Yang committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
47
var loaded struct {
Michael Yang's avatar
Michael Yang committed
48
49
	mu sync.Mutex

50
	llm        llm.LLM
51
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
52
53
54

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55

56
57
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
58
59
}

60
61
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
62
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
63
func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
64
65
66
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
67
		return err
68
69
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
70
71
	if err := opts.FromMap(reqOpts); err != nil {
		return err
72
73
	}

74
75
76
77
78
79
80
81
82
83
84
	// check if the loaded model is still running in a subprocess, in case something unexpected happened
	if loaded.llm != nil {
		if err := loaded.llm.Ping(ctx); err != nil {
			log.Print("loaded llm process not responding, closing now")
			// the subprocess is no longer running, so close it
			loaded.llm.Close()
			loaded.llm = nil
			loaded.digest = ""
		}
	}

85
	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
86
		if loaded.llm != nil {
87
			log.Println("changing loaded model")
Jeffrey Morgan's avatar
Jeffrey Morgan committed
88
89
			loaded.llm.Close()
			loaded.llm = nil
90
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
91
		}
Michael Yang's avatar
Michael Yang committed
92

93
94
95
96
97
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

98
		llmModel, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
99
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
100
			return err
Michael Yang's avatar
Michael Yang committed
101
102
		}

103
104
105
106
107
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

108
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
109
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
110
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
111
				return err
112
113
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
114
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
115
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
116
				return err
117
118
			}

119
120
121
122
			tokensWithSystem, err := llmModel.Encode(ctx, promptWithSystem)
			if err != nil {
				return err
			}
Michael Yang's avatar
Michael Yang committed
123

124
125
126
127
			tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
			if err != nil {
				return err
			}
128

Michael Yang's avatar
Michael Yang committed
129
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem)
130

131
132
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
133
	}
134

Jeffrey Morgan's avatar
Jeffrey Morgan committed
135
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
136

Jeffrey Morgan's avatar
Jeffrey Morgan committed
137
138
139
140
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
141

Jeffrey Morgan's avatar
Jeffrey Morgan committed
142
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
143
144
145
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
146
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
147
148
149
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
150
151
			loaded.llm.Close()
			loaded.llm = nil
152
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
153
		})
Michael Yang's avatar
Michael Yang committed
154
	}
155

Jeffrey Morgan's avatar
Jeffrey Morgan committed
156
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
174
175
176
177
178
		var pErr *fs.PathError
		if errors.As(err, &pErr) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
179
180
181
182
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

183
184
185
186
187
	workDir := c.GetString("workDir")

	// TODO: set this duration from the request if specified
	sessionDuration := defaultSessionDuration
	if err := load(c.Request.Context(), workDir, model, req.Options, sessionDuration); err != nil {
188
189
190
191
		if errors.Is(err, api.ErrInvalidOpts) {
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
			return
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
192
193
194
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
195

Michael Yang's avatar
Michael Yang committed
196
197
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
198
199
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
200
		promptEmbed, err := loaded.llm.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
201
202
203
204
205
206
207
208
209
210
211
212
213
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
214
215
216
217
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
218

Michael Yang's avatar
Michael Yang committed
219
220
221
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
222
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
223
224
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
225

Michael Yang's avatar
Michael Yang committed
226
227
228
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
229
230
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
231
232
233
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
234
235
		}

236
237
		// an empty request loads the model
		if req.Prompt == "" && req.Template == "" && req.System == "" {
Patrick Devine's avatar
Patrick Devine committed
238
239
240
241
242
			ch <- api.GenerateResponse{Model: req.Model, Done: true}
		} else {
			if err := loaded.llm.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
				ch <- gin.H{"error": err.Error()}
			}
Michael Yang's avatar
Michael Yang committed
243
		}
Michael Yang's avatar
Michael Yang committed
244
	}()
Michael Yang's avatar
Michael Yang committed
245

246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
	if req.Stream != nil && !*req.Stream {
		var response api.GenerateResponse
		generated := ""
		for resp := range ch {
			if r, ok := resp.(api.GenerateResponse); ok {
				generated += r.Response
				response = r
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}
		response.Response = generated
		c.JSON(http.StatusOK, response)
		return
	}

Michael Yang's avatar
Michael Yang committed
263
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
264
}
Michael Yang's avatar
Michael Yang committed
265

Bruce MacDonald's avatar
Bruce MacDonald committed
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
281
282
283

	workDir := c.GetString("workDir")
	if err := load(c.Request.Context(), workDir, model, req.Options, 5*time.Minute); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
284
285
286
287
288
289
290
291
292
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

293
	embedding, err := loaded.llm.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
294
295
296
297
298
299
300
301
302
303
304
305
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

306
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
307
308
309
310
311
312
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

313
314
315
	ch := make(chan any)
	go func() {
		defer close(ch)
316
317
		fn := func(r api.ProgressResponse) {
			ch <- r
318
		}
319

320
321
322
323
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

324
325
326
327
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
328
			ch <- gin.H{"error": err.Error()}
329
330
331
		}
	}()

332
333
334
335
336
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

337
338
339
	streamResponse(c, ch)
}

340
func PushModelHandler(c *gin.Context) {
341
342
343
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
344
345
		return
	}
Michael Yang's avatar
Michael Yang committed
346

347
348
349
	ch := make(chan any)
	go func() {
		defer close(ch)
350
351
		fn := func(r api.ProgressResponse) {
			ch <- r
352
		}
353

354
355
356
357
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
		}

358
359
		ctx := context.Background()
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
360
			ch <- gin.H{"error": err.Error()}
361
362
363
		}
	}()

364
365
366
367
368
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

369
370
371
	streamResponse(c, ch)
}

372
func CreateModelHandler(c *gin.Context) {
373
374
375
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
376
		return
377
378
	}

379
380
	workDir := c.GetString("workDir")

Michael Yang's avatar
Michael Yang committed
381
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
382
383
	go func() {
		defer close(ch)
384
385
		fn := func(resp api.ProgressResponse) {
			ch <- resp
386
387
		}

388
389
390
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

391
		if err := CreateModel(ctx, workDir, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
392
			ch <- gin.H{"error": err.Error()}
393
		}
Michael Yang's avatar
Michael Yang committed
394
	}()
Michael Yang's avatar
Michael Yang committed
395

396
397
398
399
400
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
401
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
402
403
}

404
405
406
407
408
409
410
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

411
412
413
414
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
415
416
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
417
418
		return
	}
Michael Yang's avatar
Michael Yang committed
419
420
421
422
423
424
425
426
427
428
429
430

	manifestsPath, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := PruneDirectory(manifestsPath); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

431
	c.JSON(http.StatusOK, nil)
432
433
}

Patrick Devine's avatar
Patrick Devine committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
func ShowModelHandler(c *gin.Context) {
	var req api.ShowRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	resp, err := GetModelInfo(req.Name)
	if err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

func GetModelInfo(name string) (*api.ShowResponse, error) {
	model, err := GetModel(name)
	if err != nil {
		return nil, err
	}

	resp := &api.ShowResponse{
		License:  strings.Join(model.License, "\n"),
		System:   model.System,
		Template: model.Template,
	}

	mf, err := ShowModelfile(model)
	if err != nil {
		return nil, err
	}

	resp.Modelfile = mf

	var params []string
	cs := 30
	for k, v := range model.Options {
		switch val := v.(type) {
		case string:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
		case int:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
		case float64:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
		case bool:
			params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
		case []interface{}:
			for _, nv := range val {
				switch nval := nv.(type) {
				case string:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
				case int:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
				case float64:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
				case bool:
					params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
				}
			}
		}
	}
	resp.Parameters = strings.Join(params, "\n")

	return resp, nil
}

505
func ListModelsHandler(c *gin.Context) {
506
	var models []api.ModelResponse
Patrick Devine's avatar
Patrick Devine committed
507
508
509
510
511
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
512
513

	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
514
		if !info.IsDir() {
Michael Yang's avatar
Michael Yang committed
515
516
517
			dir, file := filepath.Split(path)
			dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
			tag := strings.Join([]string{dir, file}, ":")
518

519
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
520
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
521
			if err != nil {
522
523
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
524
			}
Michael Yang's avatar
Michael Yang committed
525
526

			models = append(models, api.ModelResponse{
Patrick Devine's avatar
Patrick Devine committed
527
528
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
529
				Digest:     digest,
Michael Yang's avatar
Michael Yang committed
530
531
				ModifiedAt: info.ModTime(),
			})
Patrick Devine's avatar
Patrick Devine committed
532
		}
Michael Yang's avatar
Michael Yang committed
533

Patrick Devine's avatar
Patrick Devine committed
534
		return nil
Michael Yang's avatar
Michael Yang committed
535
536
537
	}

	if err := filepath.Walk(fp, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
538
539
540
541
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
542
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
543
544
}

Patrick Devine's avatar
Patrick Devine committed
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Michael Yang's avatar
Michael Yang committed
562
563
564
565
566
567
568
var defaultAllowOrigins = []string{
	"localhost",
	"127.0.0.1",
	"0.0.0.0",
}

func Serve(ln net.Listener, allowOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
569
570
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Michael Yang's avatar
Michael Yang committed
571
572
573
574
575
576
577
578
579
580

	config.AllowOrigins = allowOrigins
	for _, allowOrigin := range defaultAllowOrigins {
		config.AllowOrigins = append(config.AllowOrigins,
			fmt.Sprintf("http://%s", allowOrigin),
			fmt.Sprintf("https://%s", allowOrigin),
			fmt.Sprintf("http://%s:*", allowOrigin),
			fmt.Sprintf("https://%s:*", allowOrigin),
		)
	}
Michael Yang's avatar
Michael Yang committed
581

582
583
584
585
586
587
	workDir, err := os.MkdirTemp("", "ollama")
	if err != nil {
		return err
	}
	defer os.RemoveAll(workDir)

Bruce MacDonald's avatar
Bruce MacDonald committed
588
	r := gin.Default()
589
590
591
592
593
594
595
	r.Use(
		cors.New(config),
		func(c *gin.Context) {
			c.Set("workDir", workDir)
			c.Next()
		},
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
596

597
598
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
599
	r.POST("/api/embeddings", EmbeddingHandler)
600
601
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
602
	r.POST("/api/copy", CopyModelHandler)
603
	r.DELETE("/api/delete", DeleteModelHandler)
Patrick Devine's avatar
Patrick Devine committed
604
	r.POST("/api/show", ShowModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
605

Michael Yang's avatar
Michael Yang committed
606
607
608
609
610
611
612
613
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

		r.Handle(method, "/api/tags", ListModelsHandler)
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
614
615
616
617
618
	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

619
620
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
621
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
622
623
	go func() {
		<-signals
624
625
626
		if loaded.llm != nil {
			loaded.llm.Close()
		}
627
		os.RemoveAll(workDir)
628
629
630
		os.Exit(0)
	}()

631
632
633
	if runtime.GOOS == "linux" {
		// check compatibility to log warnings
		if _, err := llm.CheckVRAM(); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
634
			log.Printf("Warning: GPU support may not enabled, check you have installed install GPU drivers: %v", err)
635
636
637
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
638
639
	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
640

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
666
func streamResponse(c *gin.Context, ch chan any) {
667
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
668
669
670
671
672
673
674
675
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
676
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
677
678
679
			return false
		}

680
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
681
682
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
683
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
684
685
686
687
688
689
			return false
		}

		return true
	})
}