Commit 580fe895 authored by Michael Yang's avatar Michael Yang Committed by Jeffrey Morgan
Browse files

free llama model

parent 50792821
...@@ -33,11 +33,12 @@ func generate(c *gin.Context) { ...@@ -33,11 +33,12 @@ func generate(c *gin.Context) {
return return
} }
l, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers)) model, err := llama.New(req.Model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
if err != nil { if err != nil {
fmt.Println("Loading the model failed:", err.Error()) fmt.Println("Loading the model failed:", err.Error())
return return
} }
defer model.Free()
templateNames := make([]string, 0, len(templates.Templates())) templateNames := make([]string, 0, len(templates.Templates()))
for _, template := range templates.Templates() { for _, template := range templates.Templates() {
...@@ -59,7 +60,7 @@ func generate(c *gin.Context) { ...@@ -59,7 +60,7 @@ func generate(c *gin.Context) {
go func() { go func() {
defer close(ch) defer close(ch)
_, err := l.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool { _, err := model.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
ch <- token ch <- token
return true return true
}), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama")) }), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment