routes.go 3.07 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"embed"
Michael Yang's avatar
Michael Yang committed
5
	"encoding/json"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
6
7
8
9
10
	"fmt"
	"io"
	"log"
	"net"
	"net/http"
Michael Yang's avatar
Michael Yang committed
11
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
12
	"runtime"
Michael Yang's avatar
Michael Yang committed
13
14
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
15
16

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
17
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
18

Jeffrey Morgan's avatar
Jeffrey Morgan committed
19
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
20
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
21
22
)

Michael Yang's avatar
Michael Yang committed
23
24
25
//go:embed templates/*
var templatesFS embed.FS
var templates = template.Must(template.ParseFS(templatesFS, "templates/*.prompt"))
Michael Yang's avatar
Michael Yang committed
26

Bruce MacDonald's avatar
Bruce MacDonald committed
27
func generate(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
28
	// TODO: these should be request parameters
Michael Yang's avatar
Michael Yang committed
29
	gpulayers := 1
Jeffrey Morgan's avatar
Jeffrey Morgan committed
30
31
32
	tokens := 512
	threads := runtime.NumCPU()

Bruce MacDonald's avatar
Bruce MacDonald committed
33
34
35
36
37
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
		return
	}
38

Michael Yang's avatar
Michael Yang committed
39
	model, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
Bruce MacDonald's avatar
Bruce MacDonald committed
40
41
42
43
	if err != nil {
		fmt.Println("Loading the model failed:", err.Error())
		return
	}
Michael Yang's avatar
Michael Yang committed
44
	defer model.Free()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
45

Michael Yang's avatar
Michael Yang committed
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
			fmt.Println("Prompt template failed:", err.Error())
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
62
	ch := make(chan string)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
63

Bruce MacDonald's avatar
Bruce MacDonald committed
64
65
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
66
		_, err := model.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
67
68
69
			ch <- token
			return true
		}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
Jeffrey Morgan's avatar
Jeffrey Morgan committed
70
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
71
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
72
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
73
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
74

Bruce MacDonald's avatar
Bruce MacDonald committed
75
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
76
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
79
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
80

Michael Yang's avatar
Michael Yang committed
81
82
		resp := api.GenerateResponse{
			Response: token,
Michael Yang's avatar
Michael Yang committed
83
84
85
86
87
88
89
90
91
92
93
94
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
95
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
96
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
97
98
99
100
101
}

func Serve(ln net.Listener) error {
	r := gin.Default()

Bruce MacDonald's avatar
Bruce MacDonald committed
102
103
104
105
106
107
108
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
			c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
			return
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
109
		progressCh := make(chan api.PullProgress)
Bruce MacDonald's avatar
Bruce MacDonald committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
				c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
			c.SSEvent("progress", progress)
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
127
128

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
129
130
131
132
133
134
135
136

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
137
138
139
140
141
142
143
144
145
146
147

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
	for _, target := range targets {
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
			bestRank = rank
			bestMatch = target
		}
	}

	return
}