routes.go 10.4 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
24
	"github.com/jmorganca/ollama/llama"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
var loaded struct {
Michael Yang's avatar
Michael Yang committed
29
30
	mu sync.Mutex

31
32
	llm        *llama.LLM
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
33
34
35

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36

37
38
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
39
40
}

Bruce MacDonald's avatar
Bruce MacDonald committed
41
42
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
43
44
45
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
46
		return err
47
48
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
49
	if err := opts.FromMap(reqOpts); err != nil {
50
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
51
		return err
52
53
54
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55
56
57
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
58
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
59
		}
Michael Yang's avatar
Michael Yang committed
60

61
62
63
64
65
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

Michael Yang's avatar
Michael Yang committed
66
67
		llm, err := llama.New(model.ModelPath, opts)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
68
			return err
Michael Yang's avatar
Michael Yang committed
69
70
		}

71
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
72
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
73
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
74
				return err
75
76
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
77
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
78
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
79
				return err
80
81
82
83
84
85
86
87
			}

			tokensWithSystem := llm.Encode(promptWithSystem)
			tokensNoSystem := llm.Encode(promptNoSystem)

			llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
88
89
		loaded.llm = llm
		loaded.digest = model.Digest
90
		loaded.options = opts
Michael Yang's avatar
Michael Yang committed
91
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
92
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
93

Jeffrey Morgan's avatar
Jeffrey Morgan committed
94
95
96
97
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
98

Jeffrey Morgan's avatar
Jeffrey Morgan committed
99
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
100
101
102
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
103
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
104
105
106
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
107
108
			loaded.llm.Close()
			loaded.llm = nil
109
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
110
		})
Michael Yang's avatar
Michael Yang committed
111
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
112
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	sessionDuration := 5 * time.Minute
	if err := load(model, req.Options, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
139

Michael Yang's avatar
Michael Yang committed
140
141
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
158
159
160
161
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
162

Michael Yang's avatar
Michael Yang committed
163
164
165
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
166
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
167
168
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
169

Michael Yang's avatar
Michael Yang committed
170
171
172
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
173
174
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
175
176
177
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
178
179
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
180
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
181
182
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
183
	}()
Michael Yang's avatar
Michael Yang committed
184

Michael Yang's avatar
Michael Yang committed
185
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
186
}
Michael Yang's avatar
Michael Yang committed
187

Bruce MacDonald's avatar
Bruce MacDonald committed
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	if err := load(model, req.Options, 5*time.Minute); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

	embedding, err := loaded.llm.Embedding(req.Prompt)
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

226
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
227
228
229
230
231
232
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

233
234
235
	ch := make(chan any)
	go func() {
		defer close(ch)
236
237
		fn := func(r api.ProgressResponse) {
			ch <- r
238
		}
239

240
241
242
243
244
245
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

246
247
248
249
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
250
			ch <- gin.H{"error": err.Error()}
251
252
253
254
255
256
		}
	}()

	streamResponse(c, ch)
}

257
func PushModelHandler(c *gin.Context) {
258
259
260
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
261
262
		return
	}
Michael Yang's avatar
Michael Yang committed
263

264
265
266
	ch := make(chan any)
	go func() {
		defer close(ch)
267
268
		fn := func(r api.ProgressResponse) {
			ch <- r
269
		}
270

271
272
273
274
275
276
277
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
278
			ch <- gin.H{"error": err.Error()}
279
280
281
282
283
284
		}
	}()

	streamResponse(c, ch)
}

285
func CreateModelHandler(c *gin.Context) {
286
287
288
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
289
		return
290
291
	}

Michael Yang's avatar
Michael Yang committed
292
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
293
294
	go func() {
		defer close(ch)
295
296
		fn := func(resp api.ProgressResponse) {
			ch <- resp
297
298
		}

299
300
301
302
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
303
			ch <- gin.H{"error": err.Error()}
304
		}
Michael Yang's avatar
Michael Yang committed
305
	}()
Michael Yang's avatar
Michael Yang committed
306

Michael Yang's avatar
Michael Yang committed
307
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
308
309
}

310
311
312
313
314
315
316
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

317
318
319
320
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
321
322
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
323
324
		return
	}
325
326
327
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
328
329
330
331
332
333
334
335
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
336
337
338
339
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
340
341
342
343
344
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
345
346
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
347
348
349
350
351
352
353
354
355
356
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
357
358
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
374
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
375
376
}

Patrick Devine's avatar
Patrick Devine committed
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
394
func Serve(ln net.Listener, origins []string) error {
Michael Yang's avatar
Michael Yang committed
395
396
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
397
	config.AllowOrigins = append(origins, []string{
Michael Yang's avatar
Michael Yang committed
398
399
400
401
402
403
404
405
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
406
407
408
409
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Jeffrey Morgan's avatar
Jeffrey Morgan committed
410
	}...)
Michael Yang's avatar
Michael Yang committed
411

Bruce MacDonald's avatar
Bruce MacDonald committed
412
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
413
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
414

415
416
417
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
418
419
420
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
421

422
423
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
424
	r.POST("/api/embeddings", EmbeddingHandler)
425
426
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
427
	r.POST("/api/copy", CopyModelHandler)
428
429
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
430
431
432
433
434
435
436
437

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
438

Michael Yang's avatar
Michael Yang committed
439
func streamResponse(c *gin.Context, ch chan any) {
440
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
441
442
443
444
445
446
447
448
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
449
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
450
451
452
453
454
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
455
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
456
457
458
459
460
461
			return false
		}

		return true
	})
}