routes.go 3.02 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
5
6
7
8
9
	"fmt"
	"io"
	"log"
	"net"
	"net/http"
Michael Yang's avatar
Michael Yang committed
10
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
	"runtime"
Michael Yang's avatar
Michael Yang committed
12
13
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
15

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
16
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17

Jeffrey Morgan's avatar
Jeffrey Morgan committed
18
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
19
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
21
)

Michael Yang's avatar
Michael Yang committed
22
23
var templates = template.Must(template.ParseGlob("templates/*.prompt"))

Bruce MacDonald's avatar
Bruce MacDonald committed
24
func generate(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
	// TODO: these should be request parameters
Michael Yang's avatar
Michael Yang committed
26
	gpulayers := 1
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
28
29
	tokens := 512
	threads := runtime.NumCPU()

Bruce MacDonald's avatar
Bruce MacDonald committed
30
31
32
33
34
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
		return
	}
35

Bruce MacDonald's avatar
Bruce MacDonald committed
36
37
38
39
40
	l, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
	if err != nil {
		fmt.Println("Loading the model failed:", err.Error())
		return
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
41

Michael Yang's avatar
Michael Yang committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
			fmt.Println("Prompt template failed:", err.Error())
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
58
	ch := make(chan string)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
59

Bruce MacDonald's avatar
Bruce MacDonald committed
60
61
62
63
64
65
	go func() {
		defer close(ch)
		_, err := l.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
			ch <- token
			return true
		}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
Jeffrey Morgan's avatar
Jeffrey Morgan committed
66
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
67
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
68
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
69
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
70

Bruce MacDonald's avatar
Bruce MacDonald committed
71
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
72
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
73
74
75
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

		resp := api.TokenResponse{
			Choices: []api.TokenResponseChoice{
				{
					Text: token,
				},
			},
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
95
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
96
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
97
98
99
100
101
}

func Serve(ln net.Listener) error {
	r := gin.Default()

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
			c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
			return
		}

		progressCh := make(chan string)
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
				c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
			c.SSEvent("progress", progress)
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
127
128

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
129
130
131
132
133
134
135
136

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
137
138
139
140
141
142
143
144
145
146
147

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
	for _, target := range targets {
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
			bestRank = rank
			bestMatch = target
		}
	}

	return
}