routes.go 10.5 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
24
	"github.com/jmorganca/ollama/llm"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
var loaded struct {
Michael Yang's avatar
Michael Yang committed
29
30
	mu sync.Mutex

31
	llm        llm.LLM
32
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
33
34
35

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36

37
38
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
39
40
}

Bruce MacDonald's avatar
Bruce MacDonald committed
41
42
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
43
44
45
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
46
		return err
47
48
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
49
	if err := opts.FromMap(reqOpts); err != nil {
50
		log.Printf("could not merge model options: %v", err)
Bruce MacDonald's avatar
Bruce MacDonald committed
51
		return err
52
53
54
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
55
56
57
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
58
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
59
		}
Michael Yang's avatar
Michael Yang committed
60

61
62
63
64
65
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

66
		llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
Michael Yang's avatar
Michael Yang committed
67
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
68
			return err
Michael Yang's avatar
Michael Yang committed
69
70
		}

71
72
73
74
75
		// set cache values before modifying opts
		loaded.llm = llmModel
		loaded.digest = model.Digest
		loaded.options = opts

76
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
77
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
78
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
79
				return err
80
81
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
82
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
83
			if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
84
				return err
85
86
			}

87
88
			tokensWithSystem := llmModel.Encode(promptWithSystem)
			tokensNoSystem := llmModel.Encode(promptNoSystem)
89

90
			opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
91

92
93
			llmModel.SetOptions(opts)
		}
Michael Yang's avatar
Michael Yang committed
94
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
95
	loaded.expireAt = time.Now().Add(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
96

Jeffrey Morgan's avatar
Jeffrey Morgan committed
97
98
99
100
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
101

Jeffrey Morgan's avatar
Jeffrey Morgan committed
102
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
103
104
105
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
106
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
107
108
109
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
110
111
			loaded.llm.Close()
			loaded.llm = nil
112
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
113
		})
Michael Yang's avatar
Michael Yang committed
114
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
115
	loaded.expireTimer.Reset(sessionDuration)
Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
	return nil
}

func GenerateHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	checkpointStart := time.Now()

	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	sessionDuration := 5 * time.Minute
	if err := load(model, req.Options, sessionDuration); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
142

Michael Yang's avatar
Michael Yang committed
143
144
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
161
162
163
164
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
165

Michael Yang's avatar
Michael Yang committed
166
167
168
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
169
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
170
171
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
172

Michael Yang's avatar
Michael Yang committed
173
174
175
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
176
177
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
178
179
180
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
181
182
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
183
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
184
185
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
186
	}()
Michael Yang's avatar
Michael Yang committed
187

Michael Yang's avatar
Michael Yang committed
188
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
189
}
Michael Yang's avatar
Michael Yang committed
190

Bruce MacDonald's avatar
Bruce MacDonald committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
func EmbeddingHandler(c *gin.Context) {
	loaded.mu.Lock()
	defer loaded.mu.Unlock()

	var req api.EmbeddingRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}
	if err := load(model, req.Options, 5*time.Minute); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if !loaded.options.EmbeddingOnly {
		c.JSON(http.StatusBadRequest, gin.H{"error": "embedding option must be set to true"})
		return
	}

	embedding, err := loaded.llm.Embedding(req.Prompt)
	if err != nil {
		log.Printf("embedding generation failed: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

	resp := api.EmbeddingResponse{
		Embedding: embedding,
	}
	c.JSON(http.StatusOK, resp)
}

229
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
230
231
232
233
234
235
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

236
237
238
	ch := make(chan any)
	go func() {
		defer close(ch)
239
240
		fn := func(r api.ProgressResponse) {
			ch <- r
241
		}
242

243
244
245
246
247
248
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

249
250
251
252
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
253
			ch <- gin.H{"error": err.Error()}
254
255
256
257
258
259
		}
	}()

	streamResponse(c, ch)
}

260
func PushModelHandler(c *gin.Context) {
261
262
263
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
264
265
		return
	}
Michael Yang's avatar
Michael Yang committed
266

267
268
269
	ch := make(chan any)
	go func() {
		defer close(ch)
270
271
		fn := func(r api.ProgressResponse) {
			ch <- r
272
		}
273

274
275
276
277
278
279
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

280
281
		ctx := context.Background()
		if err := PushModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
282
			ch <- gin.H{"error": err.Error()}
283
284
285
286
287
288
		}
	}()

	streamResponse(c, ch)
}

289
func CreateModelHandler(c *gin.Context) {
290
291
292
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
293
		return
294
295
	}

Michael Yang's avatar
Michael Yang committed
296
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
297
298
	go func() {
		defer close(ch)
299
300
		fn := func(resp api.ProgressResponse) {
			ch <- resp
301
302
		}

303
304
305
306
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
307
			ch <- gin.H{"error": err.Error()}
308
		}
Michael Yang's avatar
Michael Yang committed
309
	}()
Michael Yang's avatar
Michael Yang committed
310

Michael Yang's avatar
Michael Yang committed
311
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
312
313
}

314
315
316
317
318
319
320
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

321
322
323
324
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
325
326
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
327
328
		return
	}
329
330
331
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
332
333
334
335
336
337
338
339
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
340
341
342
343
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
344
345
346
347
348
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
349
350
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
351
352
353
354
355
356
357
358
359
360
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
361
362
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
378
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
379
380
}

Patrick Devine's avatar
Patrick Devine committed
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
398
func Serve(ln net.Listener, origins []string) error {
Michael Yang's avatar
Michael Yang committed
399
400
	config := cors.DefaultConfig()
	config.AllowWildcard = true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
401
	config.AllowOrigins = append(origins, []string{
Michael Yang's avatar
Michael Yang committed
402
403
404
405
406
407
408
409
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
410
411
412
413
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Jeffrey Morgan's avatar
Jeffrey Morgan committed
414
	}...)
Michael Yang's avatar
Michael Yang committed
415

Bruce MacDonald's avatar
Bruce MacDonald committed
416
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
417
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
418

419
420
421
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
422
423
424
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
425

426
427
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
Bruce MacDonald's avatar
Bruce MacDonald committed
428
	r.POST("/api/embeddings", EmbeddingHandler)
429
430
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
431
	r.POST("/api/copy", CopyModelHandler)
432
433
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
434
435
436
437
438
439
440
441

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
442

Michael Yang's avatar
Michael Yang committed
443
func streamResponse(c *gin.Context, ch chan any) {
444
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
445
446
447
448
449
450
451
452
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
453
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
454
455
456
457
458
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
459
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
460
461
462
463
464
465
			return false
		}

		return true
	})
}