Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
40c9dc0a
Commit
40c9dc0a
authored
Jul 14, 2023
by
Michael Yang
Browse files
fix multibyte responses
parent
0142660b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
6 deletions
+13
-6
llama/llama.go
llama/llama.go
+13
-6
No files found.
llama/llama.go
View file @
40c9dc0a
...
@@ -78,12 +78,14 @@ llama_token llama_sample(
...
@@ -78,12 +78,14 @@ llama_token llama_sample(
*/
*/
import
"C"
import
"C"
import
(
import
(
"bytes"
"errors"
"errors"
"fmt"
"fmt"
"io"
"io"
"os"
"os"
"strings"
"strings"
"time"
"time"
"unicode/utf8"
"unsafe"
"unsafe"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/api"
...
@@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
...
@@ -204,6 +206,7 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
context
.
PushLeft
(
int
(
in
))
context
.
PushLeft
(
int
(
in
))
}
}
var
b
bytes
.
Buffer
for
C
.
llama_get_kv_cache_token_count
(
llm
.
ctx
)
<
C
.
int
(
llm
.
NumCtx
)
{
for
C
.
llama_get_kv_cache_token_count
(
llm
.
ctx
)
<
C
.
int
(
llm
.
NumCtx
)
{
if
retval
:=
C
.
llama_eval
(
llm
.
ctx
,
unsafe
.
SliceData
(
input
),
C
.
int
(
len
(
input
)),
C
.
llama_get_kv_cache_token_count
(
llm
.
ctx
),
C
.
int
(
llm
.
NumThread
));
retval
!=
0
{
if
retval
:=
C
.
llama_eval
(
llm
.
ctx
,
unsafe
.
SliceData
(
input
),
C
.
int
(
len
(
input
)),
C
.
llama_get_kv_cache_token_count
(
llm
.
ctx
),
C
.
int
(
llm
.
NumThread
));
retval
!=
0
{
return
errors
.
New
(
"llama: eval"
)
return
errors
.
New
(
"llama: eval"
)
...
@@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
...
@@ -216,13 +219,17 @@ func (llm *llama) generate(input []C.llama_token, fn func(api.GenerateResponse))
return
err
return
err
}
}
// call the callback
b
.
WriteString
(
llm
.
detokenize
(
token
))
fn
(
api
.
GenerateResponse
{
if
utf8
.
Valid
(
b
.
Bytes
())
||
b
.
Len
()
>=
utf8
.
UTFMax
{
Response
:
llm
.
detokenize
(
token
),
// call the callback
})
fn
(
api
.
GenerateResponse
{
Response
:
b
.
String
(),
})
output
.
PushLeft
(
token
)
output
.
PushLeft
(
token
)
context
.
PushLeft
(
int
(
token
))
context
.
PushLeft
(
int
(
token
))
b
.
Reset
()
}
input
=
[]
C
.
llama_token
{
token
}
input
=
[]
C
.
llama_token
{
token
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment