routes.go 10.9 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
24
	"github.com/jmorganca/ollama/llm"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Michael Yang's avatar
Michael Yang committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
var mode string = gin.DebugMode

func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
42
var loaded struct {
Michael Yang's avatar
Michael Yang committed
43
44
	mu sync.Mutex

45
	llm        llm.LLM
46
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
47
48
49

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
50

51
52
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
53
54
}

55
56
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
57
58
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
59
60
61
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
62
		return err
63
64
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
65
	if err := opts.FromMap(reqOpts); err != nil {
66
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
67
		return err
68
69
70
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
71
72
73
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
74
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
75
		}
Michael Yang's avatar
Michael Yang committed
76

77
78
79
80
81
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

82
		llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
83
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
84
			return err
Michael Yang's avatar
Michael Yang committed
85
86
		}

87
88
89
90
91
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

92
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
93
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
94
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
95
				return err
96
97
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
98
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
99
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
100
				return err
101
102
			}

103
104
			tokensWithSystem := llmModel.Encode(promptWithSystem)
			tokensNoSystem := llmModel.Encode(promptNoSystem)
105

106
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
107

108
109
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
110
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
111
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
112

Jeffrey Morgan's avatar
Jeffrey Morgan committed
113
114
115
116
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
117

Jeffrey Morgan's avatar
Jeffrey Morgan committed
118
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
119
120
121
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
122
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
123
124
125
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
126
127
			loaded.llm.Close()
			loaded.llm = nil
128
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
129
		})
Michael Yang's avatar
Michael Yang committed
130
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
131
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

153
	sessionDuration := defaultSessionDuration // TODO: set this duration from the request if specified
Bruce MacDonald's avatar
Bruce MacDonald committed
154
155
156
157
	if err := load(model, req.Options, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
158

Michael Yang's avatar
Michael Yang committed
159
160
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
177
178
179
180
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
181

Michael Yang's avatar
Michael Yang committed
182
183
184
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
185
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
186
187
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
188

Michael Yang's avatar
Michael Yang committed
189
190
191
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
192
193
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
194
195
196
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
197
198
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
199
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
200
201
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
202
	}()
Michael Yang's avatar
Michael Yang committed
203

Michael Yang's avatar
Michael Yang committed
204
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
205
}
Michael Yang's avatar
Michael Yang committed
206

Bruce MacDonald's avatar
Bruce MacDonald committed
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	if err := load(model, req.Options, 5*time.Minute); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

	embedding, err := loaded.llm.Embedding(req.Prompt)
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

245
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
246
247
248
249
250
251
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

252
253
254
	ch := make(chan any)
	go func() {
		defer close(ch)
255
256
		fn := func(r api.ProgressResponse) {
			ch <- r
257
		}
258

259
260
261
262
263
264
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

265
266
267
268
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
269
			ch <- gin.H{"error": err.Error()}
270
271
272
273
274
275
		}
	}()

	streamResponse(c, ch)
}

276
func PushModelHandler(c *gin.Context) {
277
278
279
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
280
281
		return
	}
Michael Yang's avatar
Michael Yang committed
282

283
284
285
	ch := make(chan any)
	go func() {
		defer close(ch)
286
287
		fn := func(r api.ProgressResponse) {
			ch <- r
288
		}
289

290
291
292
293
294
295
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

296
297
		ctx := context.Background()
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
298
			ch <- gin.H{"error": err.Error()}
299
300
301
302
303
304
		}
	}()

	streamResponse(c, ch)
}

305
func CreateModelHandler(c *gin.Context) {
306
307
308
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
309
		return
310
311
	}

Michael Yang's avatar
Michael Yang committed
312
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
313
314
	go func() {
		defer close(ch)
315
316
		fn := func(resp api.ProgressResponse) {
			ch <- resp
317
318
		}

319
320
321
322
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
323
			ch <- gin.H{"error": err.Error()}
324
		}
Michael Yang's avatar
Michael Yang committed
325
	}()
Michael Yang's avatar
Michael Yang committed
326

Michael Yang's avatar
Michael Yang committed
327
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
328
329
}

330
331
332
333
334
335
336
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

337
338
339
340
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
341
342
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
343
344
		return
	}
345
346
347
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
348
349
350
351
352
353
354
355
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
356
357
358
359
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
360
361
362
363
364
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
365
366
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
367
368
369
370
371
372
373
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
374

375
			mp := ParseModelPath(tag)
Patrick Devine's avatar
Patrick Devine committed
376
			manifest, digest, err := GetManifest(mp)
Patrick Devine's avatar
Patrick Devine committed
377
			if err != nil {
378
379
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
380
381
382
383
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
Patrick Devine's avatar
Patrick Devine committed
384
				Digest:     digest,
Patrick Devine's avatar
Patrick Devine committed
385
386
387
388
389
390
391
392
393
394
395
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
396
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
397
398
}

Patrick Devine's avatar
Patrick Devine committed
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
416
func Serve(ln net.Listener, origins []string) error {
Michael Yang's avatar
Michael Yang committed
417
418
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
419
	config.AllowOrigins = append(origins, []string{
Michael Yang's avatar
Michael Yang committed
420
421
422
423
424
425
426
427
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
428
429
430
431
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Jeffrey Morgan's avatar
Jeffrey Morgan committed
432
	}...)
Michael Yang's avatar
Michael Yang committed
433

Bruce MacDonald's avatar
Bruce MacDonald committed
434
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
435
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
436

437
438
439
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
440
441
442
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
443

444
445
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
446
	r.POST("/api/embeddings", EmbeddingHandler)
447
448
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
449
	r.POST("/api/copy", CopyModelHandler)
450
451
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
452
453
454
455
456
457
458
459

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
460

Michael Yang's avatar
Michael Yang committed
461
func streamResponse(c *gin.Context, ch chan any) {
462
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
463
464
465
466
467
468
469
470
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
471
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
472
473
474
475
476
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
477
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
478
479
480
481
482
483
			return false
		}

		return true
	})
}