routes.go 4.88 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"embed"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
6
7
8
	"fmt"
	"io"
	"log"
Michael Yang's avatar
Michael Yang committed
9
	"math"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
11
	"net"
	"net/http"
Michael Yang's avatar
Michael Yang committed
12
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
	"runtime"
Michael Yang's avatar
Michael Yang committed
14
15
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
16
17

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
18
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
19

Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
21
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22
23
)

Michael Yang's avatar
Michael Yang committed
24
25
26
//go:embed templates/*
var templatesFS embed.FS
var templates = template.Must(template.ParseFS(templatesFS, "templates/*.prompt"))
Michael Yang's avatar
Michael Yang committed
27

Bruce MacDonald's avatar
Bruce MacDonald committed
28
29
func generate(c *gin.Context) {
	var req api.GenerateRequest
30
31
	req.ModelOptions = api.DefaultModelOptions
	req.PredictOptions = api.DefaultPredictOptions
Bruce MacDonald's avatar
Bruce MacDonald committed
32
33
34
35
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
		return
	}
36

Michael Yang's avatar
Michael Yang committed
37
38
39
40
	if remoteModel, _ := getRemote(req.Model); remoteModel != nil {
		req.Model = remoteModel.FullName()
	}

41
42
43
44
	modelOpts := getModelOpts(req)
	modelOpts.NGPULayers = 1  // hard-code this for now

	model, err := llama.New(req.Model, modelOpts)
Bruce MacDonald's avatar
Bruce MacDonald committed
45
46
47
48
	if err != nil {
		fmt.Println("Loading the model failed:", err.Error())
		return
	}
Michael Yang's avatar
Michael Yang committed
49
	defer model.Free()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
50

Michael Yang's avatar
Michael Yang committed
51
52
53
54
55
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

Michael Yang's avatar
Michael Yang committed
56
	match, _ := matchRankOne(path.Base(req.Model), templateNames)
Michael Yang's avatar
Michael Yang committed
57
58
59
60
61
62
63
64
65
66
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
			fmt.Println("Prompt template failed:", err.Error())
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
67
	ch := make(chan string)
68
69
70
71
72
73
	model.SetTokenCallback(func(token string) bool {
		ch <- token
		return true
	})

	predictOpts := getPredictOpts(req)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
74

Bruce MacDonald's avatar
Bruce MacDonald committed
75
76
	go func() {
		defer close(ch)
77
		_, err := model.Predict(req.Prompt, predictOpts)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
78
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
79
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
80
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
81
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
82

Bruce MacDonald's avatar
Bruce MacDonald committed
83
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
84
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
85
86
87
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
88

Michael Yang's avatar
Michael Yang committed
89
90
		resp := api.GenerateResponse{
			Response: token,
Michael Yang's avatar
Michael Yang committed
91
92
93
94
95
96
97
98
99
100
101
102
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
103
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
104
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
105
106
107
108
109
}

func Serve(ln net.Listener) error {
	r := gin.Default()

Bruce MacDonald's avatar
Bruce MacDonald committed
110
111
112
113
114
115
116
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
			c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
			return
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
117
		progressCh := make(chan api.PullProgress)
Bruce MacDonald's avatar
Bruce MacDonald committed
118
119
120
121
122
123
124
125
126
127
128
129
130
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
				c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
Michael Yang's avatar
Michael Yang committed
131
132
133
134
135
136
137
138
139
140
141

			bts, err := json.Marshal(progress)
			if err != nil {
				return false
			}

			bts = append(bts, '\n')
			if _, err := w.Write(bts); err != nil {
				return false
			}

Bruce MacDonald's avatar
Bruce MacDonald committed
142
143
144
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
145
146

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
147
148
149
150
151
152
153
154

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
155
156

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
Michael Yang's avatar
Michael Yang committed
157
	bestRank = math.MaxInt
Michael Yang's avatar
Michael Yang committed
158
	for _, target := range targets {
Michael Yang's avatar
Michael Yang committed
159
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank > rank {
Michael Yang's avatar
Michael Yang committed
160
161
162
163
164
165
166
			bestRank = rank
			bestMatch = target
		}
	}

	return
}
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

func getModelOpts(req api.GenerateRequest) llama.ModelOptions {
	var opts llama.ModelOptions
	opts.ContextSize = req.ModelOptions.ContextSize
	opts.Seed = req.ModelOptions.Seed
	opts.F16Memory = req.ModelOptions.F16Memory
	opts.MLock = req.ModelOptions.MLock
	opts.Embeddings = req.ModelOptions.Embeddings
	opts.MMap = req.ModelOptions.MMap
	opts.LowVRAM = req.ModelOptions.LowVRAM

	opts.NBatch = req.ModelOptions.NBatch
	opts.VocabOnly = req.ModelOptions.VocabOnly
	opts.NUMA = req.ModelOptions.NUMA
	opts.NGPULayers = req.ModelOptions.NGPULayers
	opts.MainGPU = req.ModelOptions.MainGPU
	opts.TensorSplit = req.ModelOptions.TensorSplit

	return opts
}

func getPredictOpts(req api.GenerateRequest) llama.PredictOptions {
	var opts llama.PredictOptions

	if req.PredictOptions.Threads == -1 {
		opts.Threads = runtime.NumCPU()
	} else {
		opts.Threads = req.PredictOptions.Threads
	}

	opts.Seed = req.PredictOptions.Seed
	opts.Tokens = req.PredictOptions.Tokens
	opts.Penalty = req.PredictOptions.Penalty
	opts.Repeat = req.PredictOptions.Repeat
	opts.Batch = req.PredictOptions.Batch
	opts.NKeep = req.PredictOptions.NKeep
	opts.TopK = req.PredictOptions.TopK
	opts.TopP = req.PredictOptions.TopP
	opts.TailFreeSamplingZ = req.PredictOptions.TailFreeSamplingZ
	opts.TypicalP = req.PredictOptions.TypicalP
	opts.Temperature = req.PredictOptions.Temperature
	opts.FrequencyPenalty = req.PredictOptions.FrequencyPenalty
	opts.PresencePenalty = req.PredictOptions.PresencePenalty
	opts.Mirostat = req.PredictOptions.Mirostat
	opts.MirostatTAU = req.PredictOptions.MirostatTAU
	opts.MirostatETA = req.PredictOptions.MirostatETA
	opts.MMap = req.PredictOptions.MMap

	return opts
}