routes.go 10.6 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
24
	"github.com/jmorganca/ollama/llm"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
var loaded struct {
Michael Yang's avatar
Michael Yang committed
29
30
	mu sync.Mutex

31
	llm        llm.LLM
32
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
33
34
35

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36

37
38
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
39
40
}

Bruce MacDonald's avatar
Bruce MacDonald committed
41
42
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
43
44
45
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
46
		return err
47
48
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
49
	if err := opts.FromMap(reqOpts); err != nil {
50
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
51
		return err
52
53
54
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55
56
57
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
58
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
59
		}
Michael Yang's avatar
Michael Yang committed
60

61
62
63
64
65
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

66
		llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
67
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
68
			return err
Michael Yang's avatar
Michael Yang committed
69
70
		}

71
72
73
74
75
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

76
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
77
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
78
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
79
				return err
80
81
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
82
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
83
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
84
				return err
85
86
			}

87
88
			tokensWithSystem := llmModel.Encode(promptWithSystem)
			tokensNoSystem := llmModel.Encode(promptNoSystem)
89

90
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
91

92
93
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
94
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
95
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
96

Jeffrey Morgan's avatar
Jeffrey Morgan committed
97
98
99
100
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
101

Jeffrey Morgan's avatar
Jeffrey Morgan committed
102
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
103
104
105
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
106
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
107
108
109
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
110
111
			loaded.llm.Close()
			loaded.llm = nil
112
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
113
		})
Michael Yang's avatar
Michael Yang committed
114
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
115
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	sessionDuration := 5 * time.Minute
	if err := load(model, req.Options, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
142

Michael Yang's avatar
Michael Yang committed
143
144
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
161
162
163
164
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
165

Michael Yang's avatar
Michael Yang committed
166
167
168
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
169
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
170
171
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
172

Michael Yang's avatar
Michael Yang committed
173
174
175
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
176
177
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
178
179
180
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
181
182
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
183
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
184
185
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
186
	}()
Michael Yang's avatar
Michael Yang committed
187

Michael Yang's avatar
Michael Yang committed
188
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
189
}
Michael Yang's avatar
Michael Yang committed
190

Bruce MacDonald's avatar
Bruce MacDonald committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	if err := load(model, req.Options, 5*time.Minute); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

	embedding, err := loaded.llm.Embedding(req.Prompt)
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

229
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
230
231
232
233
234
235
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

236
237
238
	ch := make(chan any)
	go func() {
		defer close(ch)
239
240
		fn := func(r api.ProgressResponse) {
			ch <- r
241
		}
242

243
244
245
246
247
248
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

249
250
251
252
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
253
			ch <- gin.H{"error": err.Error()}
254
255
256
257
258
259
		}
	}()

	streamResponse(c, ch)
}

260
func PushModelHandler(c *gin.Context) {
261
262
263
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
264
265
		return
	}
Michael Yang's avatar
Michael Yang committed
266

267
268
269
	ch := make(chan any)
	go func() {
		defer close(ch)
270
271
		fn := func(r api.ProgressResponse) {
			ch <- r
272
		}
273

274
275
276
277
278
279
280
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
281
			ch <- gin.H{"error": err.Error()}
282
283
284
285
286
287
		}
	}()

	streamResponse(c, ch)
}

288
func CreateModelHandler(c *gin.Context) {
289
290
291
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
292
		return
293
294
	}

Michael Yang's avatar
Michael Yang committed
295
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
296
297
	go func() {
		defer close(ch)
298
299
		fn := func(resp api.ProgressResponse) {
			ch <- resp
300
301
		}

302
303
304
305
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
306
			ch <- gin.H{"error": err.Error()}
307
		}
Michael Yang's avatar
Michael Yang committed
308
	}()
Michael Yang's avatar
Michael Yang committed
309

Michael Yang's avatar
Michael Yang committed
310
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
311
312
}

313
314
315
316
317
318
319
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

320
321
322
323
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
324
325
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
326
327
		return
	}
328
329
330
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
331
332
333
334
335
336
337
338
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
339
340
341
342
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
343
344
345
346
347
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
348
349
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
350
351
352
353
354
355
356
357
358
359
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
360
361
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
377
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
378
379
}

Patrick Devine's avatar
Patrick Devine committed
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

397
func Serve(ln net.Listener, extraOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
398
399
	config := cors.DefaultConfig()
	config.AllowWildcard = true
400
	allowedOrigins := []string{
Michael Yang's avatar
Michael Yang committed
401
402
403
404
405
406
407
408
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
409
410
411
412
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Michael Yang's avatar
Michael Yang committed
413
	}
414
415
	allowedOrigins = append(allowedOrigins, extraOrigins...)
	config.AllowOrigins = allowedOrigins
Michael Yang's avatar
Michael Yang committed
416

Bruce MacDonald's avatar
Bruce MacDonald committed
417
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
418
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
419

420
421
422
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
423
424
425
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
426

427
428
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
429
	r.POST("/api/embeddings", EmbeddingHandler)
430
431
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
432
	r.POST("/api/copy", CopyModelHandler)
433
434
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
435
436
437
438
439
440
441
442

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
443

Michael Yang's avatar
Michael Yang committed
444
func streamResponse(c *gin.Context, ch chan any) {
445
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
446
447
448
449
450
451
452
453
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
454
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
455
456
457
458
459
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
460
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
461
462
463
464
465
466
			return false
		}

		return true
	})
}