routes.go 3.12 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"embed"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
6
7
8
9
10
	"fmt"
	"io"
	"log"
	"net"
	"net/http"
Michael Yang's avatar
Michael Yang committed
11
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
12
	"runtime"
Michael Yang's avatar
Michael Yang committed
13
14
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
15
16

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
17
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Jeffrey Morgan's avatar
Jeffrey Morgan committed
19
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
20
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
21
22
)

Michael Yang's avatar
Michael Yang committed
23
24
25
//go:embed templates/*
var templatesFS embed.FS
var templates = template.Must(template.ParseFS(templatesFS, "templates/*.prompt"))
Michael Yang's avatar
Michael Yang committed
26

Bruce MacDonald's avatar
Bruce MacDonald committed
27
func generate(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
	// TODO: these should be request parameters
Michael Yang's avatar
Michael Yang committed
29
	gpulayers := 1
Jeffrey Morgan's avatar
Jeffrey Morgan committed
30
31
32
	tokens := 512
	threads := runtime.NumCPU()

Bruce MacDonald's avatar
Bruce MacDonald committed
33
34
35
36
37
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
		return
	}
38

Michael Yang's avatar
Michael Yang committed
39
	model, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
43
	if err != nil {
		fmt.Println("Loading the model failed:", err.Error())
		return
	}
Michael Yang's avatar
Michael Yang committed
44
	defer model.Free()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
45

Michael Yang's avatar
Michael Yang committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
			fmt.Println("Prompt template failed:", err.Error())
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
62
	ch := make(chan string)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
63

Bruce MacDonald's avatar
Bruce MacDonald committed
64
65
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
66
		_, err := model.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
67
68
69
			ch <- token
			return true
		}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
Jeffrey Morgan's avatar
Jeffrey Morgan committed
70
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
71
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
72
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
73
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
74

Bruce MacDonald's avatar
Bruce MacDonald committed
75
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
76
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
79
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

		resp := api.TokenResponse{
			Choices: []api.TokenResponseChoice{
				{
					Text: token,
				},
			},
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
99
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
100
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
101
102
103
104
105
}

func Serve(ln net.Listener) error {
	r := gin.Default()

Bruce MacDonald's avatar
Bruce MacDonald committed
106
107
108
109
110
111
112
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
			c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
			return
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
113
		progressCh := make(chan api.PullProgress)
Bruce MacDonald's avatar
Bruce MacDonald committed
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
				c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
			c.SSEvent("progress", progress)
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
131
132

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
133
134
135
136
137
138
139
140

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
141
142
143
144
145
146
147
148
149
150
151

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
	for _, target := range targets {
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
			bestRank = rank
			bestMatch = target
		}
	}

	return
}