Commit 40c9dc0a authored by Michael Yang's avatar Michael Yang
Browse files

fix multibyte responses

parent 0142660b
...@@ -78,12 +78,14 @@ llama_token llama_sample( ...@@ -78,12 +78,14 @@ llama_token llama_sample(
*/ */
import "C" import "C"
import ( import (
"bytes"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"os" "os"
"strings" "strings"
"time" "time"
"unicode/utf8"
"unsafe" "unsafe"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
...@@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse)) ...@@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
context.PushLeft(int(in)) context.PushLeft(int(in))
} }
var b bytes.Buffer
for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) { for C.llama_get_kv_cache_token_count(llm.ctx) < C.int(llm.NumCtx) {
if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 { if retval := C.llama_eval(llm.ctx, unsafe.SliceData(input), C.int(len(input)), C.llama_get_kv_cache_token_count(llm.ctx), C.int(llm.NumThread)); retval != 0 {
return errors.New("llama: eval") return errors.New("llama: eval")
...@@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse)) ...@@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
return err return err
} }
// call the callback b.WriteString(llm.detokenize(token))
fn(api.GenerateResponse{ if utf8.Valid(b.Bytes()) || b.Len() >= utf8.UTFMax {
Response: llm.detokenize(token), // call the callback
}) fn(api.GenerateResponse{
Response: b.String(),
})
output.PushLeft(token) output.PushLeft(token)
context.PushLeft(int(token)) context.PushLeft(int(token))
b.Reset()
}
input = []C.llama_token{token} input = []C.llama_token{token}
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment