fix multibyte responses

40c9dc0a · Michael Yang · 0142660b · 40c9dc0a
Commit 40c9dc0a authored Jul 14, 2023 by Michael Yang
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 6 deletions

llama/llama.go llama/llama.go +13 -6

No files found.
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -78,12 +78,14 @@ llama_token llama_sample(
 */
 import "C"
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
 	"os"
 	"strings"
 	"time"
+	"unicode/utf8"
 	"unsafe"
 	"github.com/jmorganca/ollama/api"
@@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
 		context.PushLeft(int(in))
 	}
+	var b bytes.Buffer
 	for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) {
 		if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 {
 			return errors.New("llama: eval")
@@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
 			return err
 		}
-		// call the callback
+		b.WriteString(llm.detokenize(token))
-		fn(api.GenerateResponse{
+		if utf8.Valid(b.Bytes()) || b.Len() >= utf8.UTFMax {
-			Response: llm.detokenize(token),
+			// call the callback
-		})
+			fn(api.GenerateResponse{
+				Response: b.String(),
+			})
-		output.PushLeft(token)
+			output.PushLeft(token)
-		context.PushLeft(int(token))
+			context.PushLeft(int(token))
+			b.Reset()
+		}
 		input = []C.llama_token{token}
 	}