routes.go 9.45 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
4
	"context"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
7
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
8
9
10
11
	"io"
	"log"
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"reflect"
Michael Yang's avatar
Michael Yang committed
15
	"strings"
Michael Yang's avatar
Michael Yang committed
16
	"sync"
17
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Michael Yang's avatar
Michael Yang committed
19
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
21
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
24
	"github.com/jmorganca/ollama/llama"
25
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
26
27
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
var loaded struct {
Michael Yang's avatar
Michael Yang committed
29
30
	mu sync.Mutex

31
32
	llm        *llama.LLM
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
33
34
35

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36

37
38
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
39
40
}

41
func GenerateHandler(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
42
43
	loaded.mu.Lock()
	defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
44

Michael Yang's avatar
Michael Yang committed
45
	checkpointStart := time.Now()
46

Michael Yang's avatar
Michael Yang committed
47
	var req api.GenerateRequest
Bruce MacDonald's avatar
Bruce MacDonald committed
48
	if err := c.ShouldBindJSON(&req); err != nil {
49
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
50
51
		return
	}
52

53
54
55
56
	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
Bruce MacDonald's avatar
Bruce MacDonald committed
57
	}
Michael Yang's avatar
Michael Yang committed
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := opts.FromMap(req.Options); err != nil {
		log.Printf("could not merge model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
73
74
75
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
76
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
77
		}
Michael Yang's avatar
Michael Yang committed
78

79
80
81
82
83
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

Michael Yang's avatar
Michael Yang committed
84
85
86
87
88
89
		llm, err := llama.New(model.ModelPath, opts)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

90
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
91
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
92
93
94
95
96
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
97
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
98
99
100
101
102
103
104
105
106
107
108
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			tokensWithSystem := llm.Encode(promptWithSystem)
			tokensNoSystem := llm.Encode(promptNoSystem)

			llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
109
110
		loaded.llm = llm
		loaded.digest = model.Digest
111
		loaded.options = opts
Michael Yang's avatar
Michael Yang committed
112
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
113
	sessionDuration := 5 * time.Minute
Michael Yang's avatar
Michael Yang committed
114

Jeffrey Morgan's avatar
Jeffrey Morgan committed
115
116
117
118
119
	loaded.expireAt = time.Now().Add(sessionDuration)
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
120

Jeffrey Morgan's avatar
Jeffrey Morgan committed
121
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
122
123
124
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
125
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
126
127
128
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
129
130
			loaded.llm.Close()
			loaded.llm = nil
131
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
132
		})
Michael Yang's avatar
Michael Yang committed
133
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
134
	loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
135

Michael Yang's avatar
Michael Yang committed
136
137
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
154
155
156
157
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
158

Michael Yang's avatar
Michael Yang committed
159
160
161
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
162
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
163
164
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
165

Michael Yang's avatar
Michael Yang committed
166
167
168
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
169
170
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
171
172
173
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
174
175
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
176
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
177
178
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
179
	}()
Michael Yang's avatar
Michael Yang committed
180

Michael Yang's avatar
Michael Yang committed
181
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
182
}
Michael Yang's avatar
Michael Yang committed
183

184
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
185
186
187
188
189
190
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

191
192
193
	ch := make(chan any)
	go func() {
		defer close(ch)
194
195
		fn := func(r api.ProgressResponse) {
			ch <- r
196
		}
197

198
199
200
201
202
203
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

204
205
206
207
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := PullModel(ctx, req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
208
			ch <- gin.H{"error": err.Error()}
209
210
211
212
213
214
		}
	}()

	streamResponse(c, ch)
}

215
func PushModelHandler(c *gin.Context) {
216
217
218
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
219
220
		return
	}
Michael Yang's avatar
Michael Yang committed
221

222
223
224
	ch := make(chan any)
	go func() {
		defer close(ch)
225
226
		fn := func(r api.ProgressResponse) {
			ch <- r
227
		}
228

229
230
231
232
233
234
235
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
236
			ch <- gin.H{"error": err.Error()}
237
238
239
240
241
242
		}
	}()

	streamResponse(c, ch)
}

243
func CreateModelHandler(c *gin.Context) {
244
245
246
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
247
		return
248
249
	}

Michael Yang's avatar
Michael Yang committed
250
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
251
252
	go func() {
		defer close(ch)
253
254
		fn := func(resp api.ProgressResponse) {
			ch <- resp
255
256
		}

257
258
259
260
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

		if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
261
			ch <- gin.H{"error": err.Error()}
262
		}
Michael Yang's avatar
Michael Yang committed
263
	}()
Michael Yang's avatar
Michael Yang committed
264

Michael Yang's avatar
Michael Yang committed
265
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
266
267
}

268
269
270
271
272
273
274
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

275
276
277
278
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
279
280
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
281
282
		return
	}
283
284
285
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
286
287
288
289
290
291
292
293
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
294
295
296
297
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
298
299
300
301
302
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
303
304
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
305
306
307
308
309
310
311
312
313
314
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
315
316
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
332
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
333
334
}

Patrick Devine's avatar
Patrick Devine committed
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

352
func Serve(ln net.Listener, extraOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
353
354
	config := cors.DefaultConfig()
	config.AllowWildcard = true
355
	allowedOrigins := []string{
Michael Yang's avatar
Michael Yang committed
356
357
358
359
360
361
362
363
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
364
365
366
367
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Michael Yang's avatar
Michael Yang committed
368
	}
369
370
	allowedOrigins = append(allowedOrigins, extraOrigins...)
	config.AllowOrigins = allowedOrigins
Michael Yang's avatar
Michael Yang committed
371

Bruce MacDonald's avatar
Bruce MacDonald committed
372
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
373
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
374

375
376
377
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
378
379
380
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
381

382
383
384
385
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
386
	r.POST("/api/copy", CopyModelHandler)
387
388
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
389
390
391
392
393
394
395
396

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
397

Michael Yang's avatar
Michael Yang committed
398
func streamResponse(c *gin.Context, ch chan any) {
399
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
400
401
402
403
404
405
406
407
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
408
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
409
410
411
412
413
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
414
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
415
416
417
418
419
420
			return false
		}

		return true
	})
}