routes.go 3.05 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
5
6
7
8
9
	"fmt"
	"io"
	"log"
	"net"
	"net/http"
Michael Yang's avatar
Michael Yang committed
10
	"path"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
11
	"runtime"
Michael Yang's avatar
Michael Yang committed
12
13
	"strings"
	"text/template"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
14
15

	"github.com/gin-gonic/gin"
Michael Yang's avatar
Michael Yang committed
16
	"github.com/lithammer/fuzzysearch/fuzzy"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
17

Jeffrey Morgan's avatar
Jeffrey Morgan committed
18
	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
19
	"github.com/jmorganca/ollama/llama"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
20
21
)

Michael Yang's avatar
Michael Yang committed
22
23
var templates = template.Must(template.ParseGlob("templates/*.prompt"))

Bruce MacDonald's avatar
Bruce MacDonald committed
24
func generate(c *gin.Context) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
25
	// TODO: these should be request parameters
Michael Yang's avatar
Michael Yang committed
26
	gpulayers := 1
Jeffrey Morgan's avatar
Jeffrey Morgan committed
27
28
29
	tokens := 512
	threads := runtime.NumCPU()

Bruce MacDonald's avatar
Bruce MacDonald committed
30
31
32
33
34
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); err != nil {
		c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
		return
	}
35

Michael Yang's avatar
Michael Yang committed
36
	model, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
Bruce MacDonald's avatar
Bruce MacDonald committed
37
38
39
40
	if err != nil {
		fmt.Println("Loading the model failed:", err.Error())
		return
	}
Michael Yang's avatar
Michael Yang committed
41
	defer model.Free()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
42

Michael Yang's avatar
Michael Yang committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
	templateNames := make([]string, 0, len(templates.Templates()))
	for _, template := range templates.Templates() {
		templateNames = append(templateNames, template.Name())
	}

	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
	if template := templates.Lookup(match); template != nil {
		var sb strings.Builder
		if err := template.Execute(&sb, req); err != nil {
			fmt.Println("Prompt template failed:", err.Error())
			return
		}

		req.Prompt = sb.String()
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
59
	ch := make(chan string)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
60

Bruce MacDonald's avatar
Bruce MacDonald committed
61
62
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
63
		_, err := model.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
Bruce MacDonald's avatar
Bruce MacDonald committed
64
65
66
			ch <- token
			return true
		}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
Jeffrey Morgan's avatar
Jeffrey Morgan committed
67
		if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
68
			panic(err)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
69
		}
Bruce MacDonald's avatar
Bruce MacDonald committed
70
	}()
Jeffrey Morgan's avatar
Jeffrey Morgan committed
71

Bruce MacDonald's avatar
Bruce MacDonald committed
72
	c.Stream(func(w io.Writer) bool {
Michael Yang's avatar
Michael Yang committed
73
		token, ok := <-ch
Bruce MacDonald's avatar
Bruce MacDonald committed
74
75
76
		if !ok {
			return false
		}
Michael Yang's avatar
Michael Yang committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

		resp := api.TokenResponse{
			Choices: []api.TokenResponseChoice{
				{
					Text: token,
				},
			},
		}

		bts, err := json.Marshal(resp)
		if err != nil {
			return false
		}

		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
			return false
		}

Bruce MacDonald's avatar
Bruce MacDonald committed
96
		return true
Jeffrey Morgan's avatar
Jeffrey Morgan committed
97
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
98
99
100
101
102
}

func Serve(ln net.Listener) error {
	r := gin.Default()

Bruce MacDonald's avatar
Bruce MacDonald committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
	r.POST("api/pull", func(c *gin.Context) {
		var req api.PullRequest
		if err := c.ShouldBindJSON(&req); err != nil {
			c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
			return
		}

		progressCh := make(chan string)
		go func() {
			defer close(progressCh)
			if err := pull(req.Model, progressCh); err != nil {
				c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
				return
			}
		}()

		c.Stream(func(w io.Writer) bool {
			progress, ok := <-progressCh
			if !ok {
				return false
			}
			c.SSEvent("progress", progress)
			return true
		})
	})
Bruce MacDonald's avatar
Bruce MacDonald committed
128
129

	r.POST("/api/generate", generate)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
130
131
132
133
134
135
136
137

	log.Printf("Listening on %s", ln.Addr())
	s := &http.Server{
		Handler: r,
	}

	return s.Serve(ln)
}
Michael Yang's avatar
Michael Yang committed
138
139
140
141
142
143
144
145
146
147
148

func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
	for _, target := range targets {
		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
			bestRank = rank
			bestMatch = target
		}
	}

	return
}