routes.go 11.4 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
7
8
9
10
	"io"
	"log"
	"net"
	"net/http"
11
	"os"
12
	"os/signal"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"syscall"
18
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
19

Michael Yang's avatar
Michael Yang committed
20
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
21
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
22
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23

Jeffrey Morgan's avatar
Jeffrey Morgan committed
24
	"github.com/jmorganca/ollama/api"
25
	"github.com/jmorganca/ollama/llm"
26
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
28
)

Michael Yang's avatar
Michael Yang committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
43
var loaded struct {
Michael Yang's avatar
Michael Yang committed
44
45
	mu sync.Mutex

46
	llm        llm.LLM
47
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
48
49
50

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
51

52
53
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
54
55
}

56
57
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
58
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
59
func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
60
61
62
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
63
		return err
64
65
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
66
	if err := opts.FromMap(reqOpts); err != nil {
67
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
68
		return err
69
70
	}

71
72
73
74
75
76
77
78
79
80
81
	// check if the loaded model is still running in a subprocess, in case something unexpected happened
	if loaded.llm != nil {
		if err := loaded.llm.Ping(ctx); err != nil {
			log.Print("loaded llm process not responding, closing now")
			// the subprocess is no longer running, so close it
			loaded.llm.Close()
			loaded.llm = nil
			loaded.digest = ""
		}
	}

82
	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
83
		if loaded.llm != nil {
84
			log.Println("changing loaded model")
Jeffrey Morgan's avatar
Jeffrey Morgan committed
85
86
			loaded.llm.Close()
			loaded.llm = nil
87
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
88
		}
Michael Yang's avatar
Michael Yang committed
89

90
91
92
93
94
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

95
		llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
96
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
97
			return err
Michael Yang's avatar
Michael Yang committed
98
99
		}

100
101
102
103
104
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

105
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
106
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
107
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
108
				return err
109
110
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
111
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
112
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
113
				return err
114
115
			}

116
117
118
119
120
121
122
123
			tokensWithSystem, err := llmModel.Encode(ctx, promptWithSystem)
			if err != nil {
				return err
			}
			tokensNoSystem, err := llmModel.Encode(ctx, promptNoSystem)
			if err != nil {
				return err
			}
124

125
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
126

127
128
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
129
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
130
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
131

Jeffrey Morgan's avatar
Jeffrey Morgan committed
132
133
134
135
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
136

Jeffrey Morgan's avatar
Jeffrey Morgan committed
137
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
138
139
140
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
141
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
142
143
144
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
145
146
			loaded.llm.Close()
			loaded.llm = nil
147
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
148
		})
Michael Yang's avatar
Michael Yang committed
149
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
150
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

172
	sessionDuration := defaultSessionDuration // TODO: set this duration from the request if specified
173
	if err := load(c.Request.Context(), model, req.Options, sessionDuration); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
174
175
176
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
177

Michael Yang's avatar
Michael Yang committed
178
179
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
180
181
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
182
		promptEmbed, err := loaded.llm.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
183
184
185
186
187
188
189
190
191
192
193
194
195
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
196
197
198
199
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
200

Michael Yang's avatar
Michael Yang committed
201
202
203
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
204
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
205
206
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
207

Michael Yang's avatar
Michael Yang committed
208
209
210
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
211
212
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
213
214
215
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
216
217
		}

218
		if err := loaded.llm.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
219
220
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
221
	}()
Michael Yang's avatar
Michael Yang committed
222

Michael Yang's avatar
Michael Yang committed
223
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
224
}
Michael Yang's avatar
Michael Yang committed
225

Bruce MacDonald's avatar
Bruce MacDonald committed
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
241
	if err := load(c.Request.Context(), model, req.Options, 5*time.Minute); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
242
243
244
245
246
247
248
249
250
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

251
	embedding, err := loaded.llm.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
252
253
254
255
256
257
258
259
260
261
262
263
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

264
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
265
266
267
268
269
270
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

271
272
273
	ch := make(chan any)
	go func() {
		defer close(ch)
274
275
		fn := func(r api.ProgressResponse) {
			ch <- r
276
		}
277

278
279
280
281
282
283
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

284
285
286
287
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
288
			ch <- gin.H{"error": err.Error()}
289
290
291
292
293
294
		}
	}()

	streamResponse(c, ch)
}

295
func PushModelHandler(c *gin.Context) {
296
297
298
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
299
300
		return
	}
Michael Yang's avatar
Michael Yang committed
301

302
303
304
	ch := make(chan any)
	go func() {
		defer close(ch)
305
306
		fn := func(r api.ProgressResponse) {
			ch <- r
307
		}
308

309
310
311
312
313
314
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

315
316
		ctx := context.Background()
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
317
			ch <- gin.H{"error": err.Error()}
318
319
320
321
322
323
		}
	}()

	streamResponse(c, ch)
}

324
func CreateModelHandler(c *gin.Context) {
325
326
327
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
328
		return
329
330
	}

Michael Yang's avatar
Michael Yang committed
331
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
332
333
	go func() {
		defer close(ch)
334
335
		fn := func(resp api.ProgressResponse) {
			ch <- resp
336
337
		}

338
339
340
341
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
342
			ch <- gin.H{"error": err.Error()}
343
		}
Michael Yang's avatar
Michael Yang committed
344
	}()
Michael Yang's avatar
Michael Yang committed
345

Michael Yang's avatar
Michael Yang committed
346
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
347
348
}

349
350
351
352
353
354
355
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

356
357
358
359
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
360
361
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
362
363
		return
	}
364
365
366
}

func ListModelsHandler(c *gin.Context) {
367
	var models []api.ModelResponse
Patrick Devine's avatar
Patrick Devine committed
368
369
370
371
372
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
373
374

	walkFunc := func(path string, info os.FileInfo, _ error) error {
Patrick Devine's avatar
Patrick Devine committed
375
		if !info.IsDir() {
Michael Yang's avatar
Michael Yang committed
376
377
378
			dir, file := filepath.Split(path)
			dir = strings.Trim(strings.TrimPrefix(dir, fp), string(os.PathSeparator))
			tag := strings.Join([]string{dir, file}, ":")
379

380
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
381
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
382
			if err != nil {
383
384
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
385
			}
Michael Yang's avatar
Michael Yang committed
386
387

			models = append(models, api.ModelResponse{
Patrick Devine's avatar
Patrick Devine committed
388
389
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
390
				Digest:     digest,
Michael Yang's avatar
Michael Yang committed
391
392
				ModifiedAt: info.ModTime(),
			})
Patrick Devine's avatar
Patrick Devine committed
393
		}
Michael Yang's avatar
Michael Yang committed
394

Patrick Devine's avatar
Patrick Devine committed
395
		return nil
Michael Yang's avatar
Michael Yang committed
396
397
398
	}

	if err := filepath.Walk(fp, walkFunc); err != nil {
Patrick Devine's avatar
Patrick Devine committed
399
400
401
402
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
403
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
404
405
}

Patrick Devine's avatar
Patrick Devine committed
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
423
func Serve(ln net.Listener, origins []string) error {
Michael Yang's avatar
Michael Yang committed
424
425
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
426
	config.AllowOrigins = append(origins, []string{
Michael Yang's avatar
Michael Yang committed
427
428
429
430
431
432
433
434
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
435
436
437
438
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Jeffrey Morgan's avatar
Jeffrey Morgan committed
439
	}...)
Michael Yang's avatar
Michael Yang committed
440

Bruce MacDonald's avatar
Bruce MacDonald committed
441
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
442
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
443

444
445
446
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
447
448
449
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
450

451
452
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
453
	r.POST("/api/embeddings", EmbeddingHandler)
454
455
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
456
	r.POST("/api/copy", CopyModelHandler)
457
458
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
459
460
461
462
463
464

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

465
466
467
468
469
470
471
472
473
474
475
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
	signal.Notify(signals, syscall.SIGINT)
	go func() {
		<-signals
		if loaded.llm != nil {
			loaded.llm.Close()
		}
		os.Exit(0)
	}()

Jeffrey Morgan's avatar
Jeffrey Morgan committed
476
477
	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
478

Michael Yang's avatar
Michael Yang committed
479
func streamResponse(c *gin.Context, ch chan any) {
480
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
481
482
483
484
485
486
487
488
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
489
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
490
491
492
493
494
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
495
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
496
497
498
499
500
501
			return false
		}

		return true
	})
}