routes.go 10.6 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
24
	"github.com/jmorganca/ollama/llm"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
var loaded struct {
Michael Yang's avatar
Michael Yang committed
29
30
	mu sync.Mutex

31
	llm        llm.LLM
32
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
33
34
35

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36

37
38
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
39
40
}

41
42
var defaultSessionDuration = 5 * time.Minute

Bruce MacDonald's avatar
Bruce MacDonald committed
43
44
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
45
46
47
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
48
		return err
49
50
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
51
	if err := opts.FromMap(reqOpts); err != nil {
52
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
53
		return err
54
55
56
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
57
58
59
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
60
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
61
		}
Michael Yang's avatar
Michael Yang committed
62

63
64
65
66
67
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

68
		llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
69
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
70
			return err
Michael Yang's avatar
Michael Yang committed
71
72
		}

73
74
75
76
77
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

78
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
79
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
80
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
81
				return err
82
83
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
84
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
85
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
86
				return err
87
88
			}

89
90
			tokensWithSystem := llmModel.Encode(promptWithSystem)
			tokensNoSystem := llmModel.Encode(promptNoSystem)
91

92
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
93

94
95
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
96
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
97
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
98

Jeffrey Morgan's avatar
Jeffrey Morgan committed
99
100
101
102
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
103

Jeffrey Morgan's avatar
Jeffrey Morgan committed
104
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
105
106
107
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
108
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
109
110
111
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
112
113
			loaded.llm.Close()
			loaded.llm = nil
114
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
115
		})
Michael Yang's avatar
Michael Yang committed
116
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
117
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

139
	sessionDuration := defaultSessionDuration // TODO: set this duration from the request if specified
Bruce MacDonald's avatar
Bruce MacDonald committed
140
141
142
143
	if err := load(model, req.Options, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
144

Michael Yang's avatar
Michael Yang committed
145
146
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
163
164
165
166
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
167

Michael Yang's avatar
Michael Yang committed
168
169
170
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
171
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
172
173
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
174

Michael Yang's avatar
Michael Yang committed
175
176
177
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
178
179
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
180
181
182
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
183
184
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
185
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
186
187
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
188
	}()
Michael Yang's avatar
Michael Yang committed
189

Michael Yang's avatar
Michael Yang committed
190
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
191
}
Michael Yang's avatar
Michael Yang committed
192

Bruce MacDonald's avatar
Bruce MacDonald committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	if err := load(model, req.Options, 5*time.Minute); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

	embedding, err := loaded.llm.Embedding(req.Prompt)
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

231
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
232
233
234
235
236
237
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

238
239
240
	ch := make(chan any)
	go func() {
		defer close(ch)
241
242
		fn := func(r api.ProgressResponse) {
			ch <- r
243
		}
244

245
246
247
248
249
250
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

251
252
253
254
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
255
			ch <- gin.H{"error": err.Error()}
256
257
258
259
260
261
		}
	}()

	streamResponse(c, ch)
}

262
func PushModelHandler(c *gin.Context) {
263
264
265
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
266
267
		return
	}
Michael Yang's avatar
Michael Yang committed
268

269
270
271
	ch := make(chan any)
	go func() {
		defer close(ch)
272
273
		fn := func(r api.ProgressResponse) {
			ch <- r
274
		}
275

276
277
278
279
280
281
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

282
283
		ctx := context.Background()
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
284
			ch <- gin.H{"error": err.Error()}
285
286
287
288
289
290
		}
	}()

	streamResponse(c, ch)
}

291
func CreateModelHandler(c *gin.Context) {
292
293
294
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
295
		return
296
297
	}

Michael Yang's avatar
Michael Yang committed
298
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
299
300
	go func() {
		defer close(ch)
301
302
		fn := func(resp api.ProgressResponse) {
			ch <- resp
303
304
		}

305
306
307
308
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
309
			ch <- gin.H{"error": err.Error()}
310
		}
Michael Yang's avatar
Michael Yang committed
311
	}()
Michael Yang's avatar
Michael Yang committed
312

Michael Yang's avatar
Michael Yang committed
313
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
314
315
}

316
317
318
319
320
321
322
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

323
324
325
326
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
327
328
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
329
330
		return
	}
331
332
333
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
334
335
336
337
338
339
340
341
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
342
343
344
345
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
346
347
348
349
350
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
351
352
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
353
354
355
356
357
358
359
360
361
362
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
363
364
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
380
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
381
382
}

Patrick Devine's avatar
Patrick Devine committed
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
400
func Serve(ln net.Listener, origins []string) error {
Michael Yang's avatar
Michael Yang committed
401
402
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
403
	config.AllowOrigins = append(origins, []string{
Michael Yang's avatar
Michael Yang committed
404
405
406
407
408
409
410
411
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
412
413
414
415
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Jeffrey Morgan's avatar
Jeffrey Morgan committed
416
	}...)
Michael Yang's avatar
Michael Yang committed
417

Bruce MacDonald's avatar
Bruce MacDonald committed
418
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
419
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
420

421
422
423
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
424
425
426
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
427

428
429
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
430
	r.POST("/api/embeddings", EmbeddingHandler)
431
432
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
433
	r.POST("/api/copy", CopyModelHandler)
434
435
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
436
437
438
439
440
441
442
443

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
444

Michael Yang's avatar
Michael Yang committed
445
func streamResponse(c *gin.Context, ch chan any) {
446
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
447
448
449
450
451
452
453
454
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
455
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
456
457
458
459
460
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
461
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
462
463
464
465
466
467
			return false
		}

		return true
	})
}