routes.go 8.64 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"errors"
6
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
7
8
9
10
	"io"
	"log"
	"net"
	"net/http"
11
	"os"
Michael Yang's avatar
Michael Yang committed
12
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
	"reflect"
Michael Yang's avatar
Michael Yang committed
14
	"strings"
Michael Yang's avatar
Michael Yang committed
15
	"sync"
16
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17

Michael Yang's avatar
Michael Yang committed
18
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
19
	"github.com/gin-gonic/gin"
Bruce MacDonald's avatar
Bruce MacDonald committed
20
	"gonum.org/v1/gonum/mat"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
21

Jeffrey Morgan's avatar
Jeffrey Morgan committed
22
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
23
	"github.com/jmorganca/ollama/llama"
24
	"github.com/jmorganca/ollama/vector"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
26
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
var loaded struct {
Michael Yang's avatar
Michael Yang committed
28
29
	mu sync.Mutex

30
31
	llm        *llama.LLM
	Embeddings []vector.Embedding
Michael Yang's avatar
Michael Yang committed
32
33
34

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
35

36
37
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
38
39
}

40
func GenerateHandler(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
41
42
	loaded.mu.Lock()
	defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
43

Michael Yang's avatar
Michael Yang committed
44
	checkpointStart := time.Now()
45

Michael Yang's avatar
Michael Yang committed
46
	var req api.GenerateRequest
Bruce MacDonald's avatar
Bruce MacDonald committed
47
	if err := c.ShouldBindJSON(&req); err != nil {
48
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
49
50
		return
	}
51

52
53
54
55
	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
Bruce MacDonald's avatar
Bruce MacDonald committed
56
	}
Michael Yang's avatar
Michael Yang committed
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := opts.FromMap(req.Options); err != nil {
		log.Printf("could not merge model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
72
73
74
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
75
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
76
		}
Michael Yang's avatar
Michael Yang committed
77

78
79
80
81
82
		if model.Embeddings != nil && len(model.Embeddings) > 0 {
			opts.EmbeddingOnly = true // this is requried to generate embeddings, completions will still work
			loaded.Embeddings = model.Embeddings
		}

Michael Yang's avatar
Michael Yang committed
83
84
85
86
87
88
		llm, err := llama.New(model.ModelPath, opts)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
89
90
		loaded.llm = llm
		loaded.digest = model.Digest
91
		loaded.options = opts
Michael Yang's avatar
Michael Yang committed
92
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
93
	sessionDuration := 5 * time.Minute
Michael Yang's avatar
Michael Yang committed
94

Jeffrey Morgan's avatar
Jeffrey Morgan committed
95
96
97
98
99
	loaded.expireAt = time.Now().Add(sessionDuration)
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
100

Jeffrey Morgan's avatar
Jeffrey Morgan committed
101
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
102
103
104
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
105
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
106
107
108
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
109
110
			loaded.llm.Close()
			loaded.llm = nil
111
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
112
		})
Michael Yang's avatar
Michael Yang committed
113
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
114
	loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
115

Michael Yang's avatar
Michael Yang committed
116
117
	checkpointLoaded := time.Now()

Bruce MacDonald's avatar
Bruce MacDonald committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
	embedding := ""
	if model.Embeddings != nil && len(model.Embeddings) > 0 {
		promptEmbed, err := loaded.llm.Embedding(req.Prompt)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
		// TODO: set embed_top from specified parameters in modelfile
		embed_top := 3
		topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
		for _, e := range topK {
			embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
		}
	}

	prompt, err := model.Prompt(req, embedding)
Michael Yang's avatar
Michael Yang committed
134
135
136
137
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
138

Michael Yang's avatar
Michael Yang committed
139
140
141
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
142
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
143
144
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
145

Michael Yang's avatar
Michael Yang committed
146
147
148
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
149
150
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
151
152
153
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
154
155
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
156
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
157
158
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
159
	}()
Michael Yang's avatar
Michael Yang committed
160

Michael Yang's avatar
Michael Yang committed
161
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
162
}
Michael Yang's avatar
Michael Yang committed
163

164
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
165
166
167
168
169
170
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

171
172
173
	ch := make(chan any)
	go func() {
		defer close(ch)
174
175
		fn := func(r api.ProgressResponse) {
			ch <- r
176
		}
177

178
179
180
181
182
183
184
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PullModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
185
			ch <- gin.H{"error": err.Error()}
186
187
188
189
190
191
		}
	}()

	streamResponse(c, ch)
}

192
func PushModelHandler(c *gin.Context) {
193
194
195
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
196
197
		return
	}
Michael Yang's avatar
Michael Yang committed
198

199
200
201
	ch := make(chan any)
	go func() {
		defer close(ch)
202
203
		fn := func(r api.ProgressResponse) {
			ch <- r
204
		}
205

206
207
208
209
210
211
212
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
213
			ch <- gin.H{"error": err.Error()}
214
215
216
217
218
219
		}
	}()

	streamResponse(c, ch)
}

220
func CreateModelHandler(c *gin.Context) {
221
222
223
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
224
		return
225
226
	}

Michael Yang's avatar
Michael Yang committed
227
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
228
229
	go func() {
		defer close(ch)
230
231
		fn := func(resp api.ProgressResponse) {
			ch <- resp
232
233
		}

234
		if err := CreateModel(req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
235
			ch <- gin.H{"error": err.Error()}
236
		}
Michael Yang's avatar
Michael Yang committed
237
	}()
Michael Yang's avatar
Michael Yang committed
238

Michael Yang's avatar
Michael Yang committed
239
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
240
241
}

242
243
244
245
246
247
248
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

249
250
251
252
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
253
254
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
255
256
		return
	}
257
258
259
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
260
261
262
263
264
265
266
267
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
268
269
270
271
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
272
273
274
275
276
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
277
278
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
279
280
281
282
283
284
285
286
287
288
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
289
290
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
306
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
307
308
}

Patrick Devine's avatar
Patrick Devine committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

326
func Serve(ln net.Listener, extraOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
327
328
329
	config := cors.DefaultConfig()
	config.AllowWildcard = true
	// only allow http/https from localhost
330
	allowedOrigins := []string{
Michael Yang's avatar
Michael Yang committed
331
332
333
334
335
336
337
338
339
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
	}
340
341
	allowedOrigins = append(allowedOrigins, extraOrigins...)
	config.AllowOrigins = allowedOrigins
Michael Yang's avatar
Michael Yang committed
342

Bruce MacDonald's avatar
Bruce MacDonald committed
343
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
344
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
345

346
347
348
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
349
350
351
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
352

353
354
355
356
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
357
	r.POST("/api/copy", CopyModelHandler)
358
359
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
360
361
362
363
364
365
366
367

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
368

Michael Yang's avatar
Michael Yang committed
369
func streamResponse(c *gin.Context, ch chan any) {
Michael Yang's avatar
Michael Yang committed
370
371
372
373
374
375
376
377
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
378
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
379
380
381
382
383
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
384
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
385
386
387
388
389
390
			return false
		}

		return true
	})
}