routes.go 8.36 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"errors"
6
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
7
8
9
10
	"io"
	"log"
	"net"
	"net/http"
11
	"os"
Michael Yang's avatar
Michael Yang committed
12
	"path/filepath"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
	"reflect"
Michael Yang's avatar
Michael Yang committed
14
	"strings"
Michael Yang's avatar
Michael Yang committed
15
	"sync"
16
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17

Michael Yang's avatar
Michael Yang committed
18
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
19
20
	"github.com/gin-gonic/gin"

Jeffrey Morgan's avatar
Jeffrey Morgan committed
21
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
22
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
24
)

Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
var loaded struct {
Michael Yang's avatar
Michael Yang committed
26
27
28
29
30
31
	mu sync.Mutex

	llm *llama.LLM

	expireAt    time.Time
	expireTimer *time.Timer
Jeffrey Morgan's avatar
Jeffrey Morgan committed
32

33
34
	digest  string
	options api.Options
Michael Yang's avatar
Michael Yang committed
35
36
}

37
func GenerateHandler(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
38
39
	loaded.mu.Lock()
	defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
40

Michael Yang's avatar
Michael Yang committed
41
	checkpointStart := time.Now()
42

Michael Yang's avatar
Michael Yang committed
43
	var req api.GenerateRequest
Bruce MacDonald's avatar
Bruce MacDonald committed
44
	if err := c.ShouldBindJSON(&req); err != nil {
45
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
46
47
		return
	}
48

49
50
51
52
	model, err := GetModel(req.Model)
	if err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
Bruce MacDonald's avatar
Bruce MacDonald committed
53
	}
Michael Yang's avatar
Michael Yang committed
54

55
56
57
58
59
60
61
62
63
64
65
66
67
68
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		log.Printf("could not load model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if err := opts.FromMap(req.Options); err != nil {
		log.Printf("could not merge model options: %v", err)
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

	if model.Digest != loaded.digest || !reflect.DeepEqual(loaded.options, opts) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
69
70
71
		if loaded.llm != nil {
			loaded.llm.Close()
			loaded.llm = nil
72
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
73
		}
Michael Yang's avatar
Michael Yang committed
74

Michael Yang's avatar
Michael Yang committed
75
76
77
78
79
80
		llm, err := llama.New(model.ModelPath, opts)
		if err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
		if opts.NumKeep < 0 {
			promptWithSystem, err := model.Prompt(api.GenerateRequest{})
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			promptNoSystem, err := model.Prompt(api.GenerateRequest{Context: []int{0}})
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

			tokensWithSystem := llm.Encode(promptWithSystem)
			tokensNoSystem := llm.Encode(promptNoSystem)

			llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
100
101
		loaded.llm = llm
		loaded.digest = model.Digest
102
		loaded.options = opts
Michael Yang's avatar
Michael Yang committed
103
104
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
105
	sessionDuration := 5 * time.Minute
Michael Yang's avatar
Michael Yang committed
106

Jeffrey Morgan's avatar
Jeffrey Morgan committed
107
108
109
110
111
	loaded.expireAt = time.Now().Add(sessionDuration)
	if loaded.expireTimer == nil {
		loaded.expireTimer = time.AfterFunc(sessionDuration, func() {
			loaded.mu.Lock()
			defer loaded.mu.Unlock()
Michael Yang's avatar
Michael Yang committed
112

Jeffrey Morgan's avatar
Jeffrey Morgan committed
113
			if time.Now().Before(loaded.expireAt) {
Michael Yang's avatar
Michael Yang committed
114
115
116
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
117
			if loaded.llm == nil {
Michael Yang's avatar
Michael Yang committed
118
119
120
				return
			}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
121
122
			loaded.llm.Close()
			loaded.llm = nil
123
			loaded.digest = ""
Michael Yang's avatar
Michael Yang committed
124
		})
Michael Yang's avatar
Michael Yang committed
125
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
126
	loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
127

Michael Yang's avatar
Michael Yang committed
128
129
	checkpointLoaded := time.Now()

Michael Yang's avatar
Michael Yang committed
130
	prompt, err := model.Prompt(req)
Michael Yang's avatar
Michael Yang committed
131
132
133
134
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
135

Michael Yang's avatar
Michael Yang committed
136
137
138
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
139
		fn := func(r api.GenerateResponse) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
140
141
			loaded.expireAt = time.Now().Add(sessionDuration)
			loaded.expireTimer.Reset(sessionDuration)
Michael Yang's avatar
Michael Yang committed
142

Michael Yang's avatar
Michael Yang committed
143
144
145
			r.Model = req.Model
			r.CreatedAt = time.Now().UTC()
			if r.Done {
Michael Yang's avatar
Michael Yang committed
146
147
				r.TotalDuration = time.Since(checkpointStart)
				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
Michael Yang's avatar
Michael Yang committed
148
149
150
			}

			ch <- r
Michael Yang's avatar
Michael Yang committed
151
152
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
153
		if err := loaded.llm.Predict(req.Context, prompt, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
154
155
			ch <- gin.H{"error": err.Error()}
		}
Michael Yang's avatar
Michael Yang committed
156
	}()
Michael Yang's avatar
Michael Yang committed
157

Michael Yang's avatar
Michael Yang committed
158
	streamResponse(c, ch)
Michael Yang's avatar
Michael Yang committed
159
}
Michael Yang's avatar
Michael Yang committed
160

161
func PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
162
163
164
165
166
167
	var req api.PullRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

168
169
170
	ch := make(chan any)
	go func() {
		defer close(ch)
171
172
		fn := func(r api.ProgressResponse) {
			ch <- r
173
		}
174

175
176
177
178
179
180
181
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PullModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
182
			ch <- gin.H{"error": err.Error()}
183
184
185
186
187
188
		}
	}()

	streamResponse(c, ch)
}

189
func PushModelHandler(c *gin.Context) {
190
191
192
	var req api.PushRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
193
194
		return
	}
Michael Yang's avatar
Michael Yang committed
195

196
197
198
	ch := make(chan any)
	go func() {
		defer close(ch)
199
200
		fn := func(r api.ProgressResponse) {
			ch <- r
201
		}
202

203
204
205
206
207
208
209
		regOpts := &RegistryOptions{
			Insecure: req.Insecure,
			Username: req.Username,
			Password: req.Password,
		}

		if err := PushModel(req.Name, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
210
			ch <- gin.H{"error": err.Error()}
211
212
213
214
215
216
		}
	}()

	streamResponse(c, ch)
}

217
func CreateModelHandler(c *gin.Context) {
218
219
220
	var req api.CreateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
Michael Yang's avatar
Michael Yang committed
221
		return
222
223
	}

Michael Yang's avatar
Michael Yang committed
224
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
225
226
	go func() {
		defer close(ch)
227
228
		fn := func(resp api.ProgressResponse) {
			ch <- resp
229
230
		}

231
		if err := CreateModel(req.Name, req.Path, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
232
			ch <- gin.H{"error": err.Error()}
233
		}
Michael Yang's avatar
Michael Yang committed
234
	}()
Michael Yang's avatar
Michael Yang committed
235

Michael Yang's avatar
Michael Yang committed
236
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
237
238
}

239
240
241
242
243
244
245
func DeleteModelHandler(c *gin.Context) {
	var req api.DeleteRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

246
247
248
249
	if err := DeleteModel(req.Name); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Name)})
		} else {
250
251
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
252
253
		return
	}
254
255
256
}

func ListModelsHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
257
258
259
260
261
262
263
264
	var models []api.ListResponseModel
	fp, err := GetManifestPath()
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
	err = filepath.Walk(fp, func(path string, info os.FileInfo, err error) error {
		if err != nil {
265
266
267
268
			if errors.Is(err, os.ErrNotExist) {
				log.Printf("manifest file does not exist: %s", fp)
				return nil
			}
Patrick Devine's avatar
Patrick Devine committed
269
270
271
272
273
			return err
		}
		if !info.IsDir() {
			fi, err := os.Stat(path)
			if err != nil {
274
275
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
276
277
278
279
280
281
282
283
284
285
			}
			path := path[len(fp)+1:]
			slashIndex := strings.LastIndex(path, "/")
			if slashIndex == -1 {
				return nil
			}
			tag := path[:slashIndex] + ":" + path[slashIndex+1:]
			mp := ParseModelPath(tag)
			manifest, err := GetManifest(mp)
			if err != nil {
286
287
				log.Printf("skipping file: %s", fp)
				return nil
Patrick Devine's avatar
Patrick Devine committed
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
			}
			model := api.ListResponseModel{
				Name:       mp.GetShortTagname(),
				Size:       manifest.GetTotalSize(),
				ModifiedAt: fi.ModTime(),
			}
			models = append(models, model)
		}
		return nil
	})
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
303
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
304
305
}

Patrick Devine's avatar
Patrick Devine committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
func CopyModelHandler(c *gin.Context) {
	var req api.CopyRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if err := CopyModel(req.Source, req.Destination); err != nil {
		if os.IsNotExist(err) {
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
		} else {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}
}

323
func Serve(ln net.Listener, extraOrigins []string) error {
Michael Yang's avatar
Michael Yang committed
324
325
326
	config := cors.DefaultConfig()
	config.AllowWildcard = true
	// only allow http/https from localhost
327
	allowedOrigins := []string{
Michael Yang's avatar
Michael Yang committed
328
329
330
331
332
333
334
335
336
		"http://localhost",
		"http://localhost:*",
		"https://localhost",
		"https://localhost:*",
		"http://127.0.0.1",
		"http://127.0.0.1:*",
		"https://127.0.0.1",
		"https://127.0.0.1:*",
	}
337
338
	allowedOrigins = append(allowedOrigins, extraOrigins...)
	config.AllowOrigins = allowedOrigins
Michael Yang's avatar
Michael Yang committed
339

Bruce MacDonald's avatar
Bruce MacDonald committed
340
	r := gin.Default()
Michael Yang's avatar
Michael Yang committed
341
	r.Use(cors.New(config))
Bruce MacDonald's avatar
Bruce MacDonald committed
342

343
344
345
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
346
347
348
	r.HEAD("/", func(c *gin.Context) {
		c.Status(http.StatusOK)
	})
349

350
351
352
353
	r.POST("/api/pull", PullModelHandler)
	r.POST("/api/generate", GenerateHandler)
	r.POST("/api/create", CreateModelHandler)
	r.POST("/api/push", PushModelHandler)
Patrick Devine's avatar
Patrick Devine committed
354
	r.POST("/api/copy", CopyModelHandler)
355
356
	r.GET("/api/tags", ListModelsHandler)
	r.DELETE("/api/delete", DeleteModelHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
357
358
359
360
361
362
363
364

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
365

Michael Yang's avatar
Michael Yang committed
366
func streamResponse(c *gin.Context, ch chan any) {
Michael Yang's avatar
Michael Yang committed
367
368
369
370
371
372
373
374
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
375
			log.Printf("streamResponse: json.Marshal failed with %s", err)
Michael Yang's avatar
Michael Yang committed
376
377
378
379
380
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
381
			log.Printf("streamResponse: w.Write failed with %s", err)
Michael Yang's avatar
Michael Yang committed
382
383
384
385
386
387
			return false
		}

		return true
	})
}