routes.go 5.55 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"embed"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
6
	"errors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
7
8
	"io"
	"log"
Michael Yang's avatar
Michael Yang committed
9
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
11
	"net"
	"net/http"
12
	"os"
Michael Yang's avatar
Michael Yang committed
13
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
	"runtime"
Michael Yang's avatar
Michael Yang committed
15
16
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17
18

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
19
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20

Jeffrey Morgan's avatar
Jeffrey Morgan committed
21
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
22
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
23
24
)

Michael Yang's avatar
Michael Yang committed
25
26
27
//go:embed templates/*
var templatesFS embed.FS
var templates = template.Must(template.ParseFS(templatesFS, "templates/*.prompt"))
Michael Yang's avatar
Michael Yang committed
28

29
30
31
32
33
34
35
36
37
func cacheDir() string {
	home, err := os.UserHomeDir()
	if err != nil {
		panic(err)
	}

	return path.Join(home, ".ollama")
}

Bruce MacDonald's avatar
Bruce MacDonald committed
38
39
func generate(c *gin.Context) {
	var req api.GenerateRequest
40
41
42
43
44
45
46
	if req.ModelOptions == nil {
		req.ModelOptions = &api.DefaultModelOptions
	}

	if req.PredictOptions == nil {
		req.PredictOptions = &api.DefaultPredictOptions
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
47
	if err := c.ShouldBindJSON(&req); err != nil {
48
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
49
50
		return
	}
51

Bruce MacDonald's avatar
Bruce MacDonald committed
52
	if remoteModel, _ := getRemote(req.Model); remoteModel != nil {
Michael Yang's avatar
Michael Yang committed
53
54
		req.Model = remoteModel.FullName()
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
55
56
	if _, err := os.Stat(req.Model); err != nil {
		if !errors.Is(err, os.ErrNotExist) {
57
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
58
59
60
61
			return
		}
		req.Model = path.Join(cacheDir(), "models", req.Model+".bin")
	}
Michael Yang's avatar
Michael Yang committed
62

63
	modelOpts := getModelOpts(req)
64
	modelOpts.NGPULayers = 1 // hard-code this for now
65
66

	model, err := llama.New(req.Model, modelOpts)
Bruce MacDonald's avatar
Bruce MacDonald committed
67
	if err != nil {
68
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
69
70
		return
	}
Michael Yang's avatar
Michael Yang committed
71
	defer model.Free()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
72

Michael Yang's avatar
Michael Yang committed
73
74
75
76
77
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

Michael Yang's avatar
Michael Yang committed
78
	match, _ := matchRankOne(path.Base(req.Model), templateNames)
Michael Yang's avatar
Michael Yang committed
79
80
81
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
82
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
83
84
85
86
87
88
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
89
	ch := make(chan string)
90
91
92
93
94
95
	model.SetTokenCallback(func(token string) bool {
		ch <- token
		return true
	})

	predictOpts := getPredictOpts(req)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
96

Bruce MacDonald's avatar
Bruce MacDonald committed
97
98
	go func() {
		defer close(ch)
99
		_, err := model.Predict(req.Prompt, predictOpts)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
100
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
101
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
102
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
103
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
104

Bruce MacDonald's avatar
Bruce MacDonald committed
105
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
106
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
107
108
109
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
110

Michael Yang's avatar
Michael Yang committed
111
112
		resp := api.GenerateResponse{
			Response: token,
Michael Yang's avatar
Michael Yang committed
113
114
115
116
117
118
119
120
121
122
123
124
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
125
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
126
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
127
128
129
130
131
}

func Serve(ln net.Listener) error {
	r := gin.Default()

132
133
134
135
	r.GET("/", func(c *gin.Context) {
		c.String(http.StatusOK, "Ollama is running")
	})

Bruce MacDonald's avatar
Bruce MacDonald committed
136
137
138
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
139
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
140
141
142
			return
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
143
		progressCh := make(chan api.PullProgress)
Bruce MacDonald's avatar
Bruce MacDonald committed
144
145
146
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
147
148
				var opError *net.OpError
				if errors.As(err, &opError) {
149
					c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
150
151
					return
				}
152
				c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
153
154
155
156
157
158
159
160
161
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
Michael Yang's avatar
Michael Yang committed
162
163
164
165
166
167
168
169
170
171
172

			bts, err := json.Marshal(progress)
			if err != nil {
				return false
			}

			bts = append(bts, '\n')
			if _, err := w.Write(bts); err != nil {
				return false
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
173
174
175
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
176
177

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
178
179
180
181
182
183
184
185

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
186
187

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
Michael Yang's avatar
Michael Yang committed
188
	bestRank = math.MaxInt
Michael Yang's avatar
Michael Yang committed
189
	for _, target := range targets {
Michael Yang's avatar
Michael Yang committed
190
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank > rank {
Michael Yang's avatar
Michael Yang committed
191
192
193
194
195
196
197
			bestRank = rank
			bestMatch = target
		}
	}

	return
}
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247

func getModelOpts(req api.GenerateRequest) llama.ModelOptions {
	var opts llama.ModelOptions
	opts.ContextSize = req.ModelOptions.ContextSize
	opts.Seed = req.ModelOptions.Seed
	opts.F16Memory = req.ModelOptions.F16Memory
	opts.MLock = req.ModelOptions.MLock
	opts.Embeddings = req.ModelOptions.Embeddings
	opts.MMap = req.ModelOptions.MMap
	opts.LowVRAM = req.ModelOptions.LowVRAM

	opts.NBatch = req.ModelOptions.NBatch
	opts.VocabOnly = req.ModelOptions.VocabOnly
	opts.NUMA = req.ModelOptions.NUMA
	opts.NGPULayers = req.ModelOptions.NGPULayers
	opts.MainGPU = req.ModelOptions.MainGPU
	opts.TensorSplit = req.ModelOptions.TensorSplit

	return opts
}

func getPredictOpts(req api.GenerateRequest) llama.PredictOptions {
	var opts llama.PredictOptions

	if req.PredictOptions.Threads == -1 {
		opts.Threads = runtime.NumCPU()
	} else {
		opts.Threads = req.PredictOptions.Threads
	}

	opts.Seed = req.PredictOptions.Seed
	opts.Tokens = req.PredictOptions.Tokens
	opts.Penalty = req.PredictOptions.Penalty
	opts.Repeat = req.PredictOptions.Repeat
	opts.Batch = req.PredictOptions.Batch
	opts.NKeep = req.PredictOptions.NKeep
	opts.TopK = req.PredictOptions.TopK
	opts.TopP = req.PredictOptions.TopP
	opts.TailFreeSamplingZ = req.PredictOptions.TailFreeSamplingZ
	opts.TypicalP = req.PredictOptions.TypicalP
	opts.Temperature = req.PredictOptions.Temperature
	opts.FrequencyPenalty = req.PredictOptions.FrequencyPenalty
	opts.PresencePenalty = req.PredictOptions.PresencePenalty
	opts.Mirostat = req.PredictOptions.Mirostat
	opts.MirostatTAU = req.PredictOptions.MirostatTAU
	opts.MirostatETA = req.PredictOptions.MirostatETA
	opts.MMap = req.PredictOptions.MMap

	return opts
}