routes.go 9.28 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"errors"
6
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
7
8
9
10
	"io"
	"log"
	"net"
	"net/http"
11
	"os"
Michael Yang's avatar
Michael Yang committed
12
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
	"reflect"
Michael Yang's avatar
Michael Yang committed
14
	"strings"
Michael Yang's avatar
Michael Yang committed
15
	"sync"
16
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17

Michael Yang's avatar
Michael Yang committed
18
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
19
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
20
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
21

Jeffrey Morgan's avatar
Jeffrey Morgan committed
22
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
23
	"github.com/jmorganca/ollama/llama"
24
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
var loaded struct {
Michael Yang's avatar
Michael Yang committed
28
29
	mu sync.Mutex

30
31
	llm        *llama.LLM
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
32
33
34

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
35

36
37
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
38
39
}

40
func GenerateHandler(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
41
42
	loaded.mu.Lock()
	defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
43

Michael Yang's avatar
Michael Yang committed
44
	checkpointStart := time.Now()
45

Michael Yang's avatar
Michael Yang committed
46
	var req api.GenerateRequest
Bruce MacDonald's avatar
Bruce MacDonald committed
47
	if err := c.ShouldBindJSON(&req); err != nil {
48
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
49
50
		return
	}
51

52
53
54
55
	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
Bruce MacDonald's avatar
Bruce MacDonald committed
56
	}
Michael Yang's avatar
Michael Yang committed
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := opts.FromMap(req.Options); err != nil {
		log.Printf("could not merge model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
72
73
74
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
75
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
76
		}
Michael Yang's avatar
Michael Yang committed
77

78
79
80
81
82
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

Michael Yang's avatar
Michael Yang committed
83
84
85
86
87
88
		llm, err := llama.New(model.ModelPath, opts)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

89
		if opts.NumKeep < 0 {
Bruce MacDonald's avatar
Bruce MacDonald committed
90
			promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
91
92
93
94
95
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
96
			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}}, "")
97
98
99
100
101
102
103
104
105
106
107
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			tokensWithSystem := llm.Encode(promptWithSystem)
			tokensNoSystem := llm.Encode(promptNoSystem)

			llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
108
109
		loaded.llm = llm
		loaded.digest = model.Digest
110
		loaded.options = opts
Michael Yang's avatar
Michael Yang committed
111
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
112
	sessionDuration := 5 * time.Minute
Michael Yang's avatar
Michael Yang committed
113

Jeffrey Morgan's avatar
Jeffrey Morgan committed
114
115
116
117
118
	loaded.expireAt = time.Now().Add(sessionDuration)
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
119

Jeffrey Morgan's avatar
Jeffrey Morgan committed
120
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
121
122
123
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
124
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
125
126
127
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
128
129
			loaded.llm.Close()
			loaded.llm = nil
130
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
131
		})
Michael Yang's avatar
Michael Yang committed
132
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
133
	loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
134

Michael Yang's avatar
Michael Yang committed
135
136
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
153
154
155
156
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
157

Michael Yang's avatar
Michael Yang committed
158
159
160
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
161
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
162
163
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
164

Michael Yang's avatar
Michael Yang committed
165
166
167
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
168
169
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
170
171
172
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
173
174
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
175
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
176
177
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
178
	}()
Michael Yang's avatar
Michael Yang committed
179

Michael Yang's avatar
Michael Yang committed
180
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
181
}
Michael Yang's avatar
Michael Yang committed
182

183
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
184
185
186
187
188
189
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

190
191
192
	ch := make(chan any)
	go func() {
		defer close(ch)
193
194
		fn := func(r api.ProgressResponse) {
			ch <- r
195
		}
196

197
198
199
200
201
202
203
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PullModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
204
			ch <- gin.H{"error": err.Error()}
205
206
207
208
209
210
		}
	}()

	streamResponse(c, ch)
}

211
func PushModelHandler(c *gin.Context) {
212
213
214
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
215
216
		return
	}
Michael Yang's avatar
Michael Yang committed
217

218
219
220
	ch := make(chan any)
	go func() {
		defer close(ch)
221
222
		fn := func(r api.ProgressResponse) {
			ch <- r
223
		}
224

225
226
227
228
229
230
231
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
232
			ch <- gin.H{"error": err.Error()}
233
234
235
236
237
238
		}
	}()

	streamResponse(c, ch)
}

239
func CreateModelHandler(c *gin.Context) {
240
241
242
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
243
		return
244
245
	}

Michael Yang's avatar
Michael Yang committed
246
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
247
248
	go func() {
		defer close(ch)
249
250
		fn := func(resp api.ProgressResponse) {
			ch <- resp
251
252
		}

253
		if err := CreateModel(req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
254
			ch <- gin.H{"error": err.Error()}
255
		}
Michael Yang's avatar
Michael Yang committed
256
	}()
Michael Yang's avatar
Michael Yang committed
257

Michael Yang's avatar
Michael Yang committed
258
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
259
260
}

261
262
263
264
265
266
267
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

268
269
270
271
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
272
273
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
274
275
		return
	}
276
277
278
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
279
280
281
282
283
284
285
286
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
287
288
289
290
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
291
292
293
294
295
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
296
297
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
298
299
300
301
302
303
304
305
306
307
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
308
309
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
325
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
326
327
}

Patrick Devine's avatar
Patrick Devine committed
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

345
func Serve(ln net.Listener, extraOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
346
347
	config := cors.DefaultConfig()
	config.AllowWildcard = true
348
	allowedOrigins := []string{
Michael Yang's avatar
Michael Yang committed
349
350
351
352
353
354
355
356
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
357
358
359
360
		"http://0.0.0.0",
		"http://0.0.0.0:*",
		"https://0.0.0.0",
		"https://0.0.0.0:*",
Michael Yang's avatar
Michael Yang committed
361
	}
362
363
	allowedOrigins = append(allowedOrigins, extraOrigins...)
	config.AllowOrigins = allowedOrigins
Michael Yang's avatar
Michael Yang committed
364

Bruce MacDonald's avatar
Bruce MacDonald committed
365
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
366
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
367

368
369
370
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
371
372
373
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
374

375
376
377
378
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
379
	r.POST("/api/copy", CopyModelHandler)
380
381
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
382
383
384
385
386
387
388
389

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
390

Michael Yang's avatar
Michael Yang committed
391
func streamResponse(c *gin.Context, ch chan any) {
392
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
393
394
395
396
397
398
399
400
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
401
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
402
403
404
405
406
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
407
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
408
409
410
411
412
413
			return false
		}

		return true
	})
}