Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
f2074ed4
Unverified
Commit
f2074ed4
authored
Aug 08, 2023
by
Michael Yang
Committed by
GitHub
Aug 08, 2023
Browse files
Merge pull request #306 from jmorganca/default-keep-system
automatically set num_keep if num_keep < 0
parents
34a13a9d
4dc5b117
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
14 deletions
+28
-14
api/types.go
api/types.go
+1
-0
llama/llama.go
llama/llama.go
+8
-14
server/routes.go
server/routes.go
+19
-0
No files found.
api/types.go
View file @
f2074ed4
...
...
@@ -266,6 +266,7 @@ func DefaultOptions() Options {
UseNUMA
:
false
,
NumCtx
:
2048
,
NumKeep
:
-
1
,
NumBatch
:
512
,
NumGPU
:
1
,
NumGQA
:
1
,
...
...
llama/llama.go
View file @
f2074ed4
...
...
@@ -189,10 +189,6 @@ func (llm *LLM) Predict(ctx []int, prompt string, fn func(api.GenerateResponse))
tokens
[
i
]
=
C
.
llama_token
(
ctx
[
i
])
}
if
len
(
tokens
)
==
0
{
tokens
=
llm
.
tokenize
(
" "
)
}
llm
.
marshalPrompt
(
tokens
,
prompt
)
C
.
llama_set_rng_seed
(
llm
.
ctx
,
C
.
uint
(
llm
.
Seed
))
...
...
@@ -208,7 +204,7 @@ func (llm *LLM) Predict(ctx []int, prompt string, fn func(api.GenerateResponse))
return
err
}
b
.
WriteString
(
llm
.
detokeniz
e
(
token
))
b
.
WriteString
(
llm
.
Decod
e
(
token
))
if
err
:=
llm
.
checkStopConditions
(
b
);
err
!=
nil
{
if
errors
.
Is
(
err
,
io
.
EOF
)
{
...
...
@@ -226,17 +222,15 @@ func (llm *LLM) Predict(ctx []int, prompt string, fn func(api.GenerateResponse))
}
}
last
:=
make
([]
int
,
0
,
len
(
llm
.
last
))
for
_
,
i
:=
range
llm
.
last
{
if
i
!=
0
{
last
=
append
(
last
,
int
(
i
))
}
embd
:=
make
([]
int
,
len
(
llm
.
embd
))
for
i
:=
range
llm
.
embd
{
embd
[
i
]
=
int
(
llm
.
embd
[
i
])
}
timings
:=
C
.
llama_get_timings
(
llm
.
ctx
)
fn
(
api
.
GenerateResponse
{
Done
:
true
,
Context
:
last
,
Context
:
embd
,
SampleCount
:
int
(
timings
.
n_sample
),
SampleDuration
:
parseDurationMs
(
float64
(
timings
.
t_sample_ms
)),
PromptEvalCount
:
int
(
timings
.
n_p_eval
),
...
...
@@ -261,7 +255,7 @@ func (llm *LLM) checkStopConditions(b bytes.Buffer) error {
}
func
(
llm
*
LLM
)
marshalPrompt
(
ctx
[]
C
.
llama_token
,
prompt
string
)
[]
C
.
llama_token
{
tokens
:=
append
(
ctx
,
llm
.
tokeniz
e
(
prompt
)
...
)
tokens
:=
append
(
ctx
,
llm
.
Encod
e
(
prompt
)
...
)
if
llm
.
NumKeep
<
0
{
llm
.
NumKeep
=
len
(
tokens
)
}
...
...
@@ -303,7 +297,7 @@ func (llm *LLM) marshalPrompt(ctx []C.llama_token, prompt string) []C.llama_toke
return
tokens
}
func
(
llm
*
LLM
)
tokeniz
e
(
prompt
string
)
[]
C
.
llama_token
{
func
(
llm
*
LLM
)
Encod
e
(
prompt
string
)
[]
C
.
llama_token
{
cPrompt
:=
C
.
CString
(
prompt
)
defer
C
.
free
(
unsafe
.
Pointer
(
cPrompt
))
...
...
@@ -315,7 +309,7 @@ func (llm *LLM) tokenize(prompt string) []C.llama_token {
return
nil
}
func
(
llm
*
LLM
)
detokeniz
e
(
tokens
...
C
.
llama_token
)
string
{
func
(
llm
*
LLM
)
Decod
e
(
tokens
...
C
.
llama_token
)
string
{
var
sb
strings
.
Builder
for
_
,
token
:=
range
tokens
{
sb
.
WriteString
(
C
.
GoString
(
C
.
llama_token_to_str
(
llm
.
ctx
,
token
)))
...
...
server/routes.go
View file @
f2074ed4
...
...
@@ -78,6 +78,25 @@ func GenerateHandler(c *gin.Context) {
return
}
if
opts
.
NumKeep
<
0
{
promptWithSystem
,
err
:=
model
.
Prompt
(
api
.
GenerateRequest
{})
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
promptNoSystem
,
err
:=
model
.
Prompt
(
api
.
GenerateRequest
{
Context
:
[]
int
{
0
}})
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
tokensWithSystem
:=
llm
.
Encode
(
promptWithSystem
)
tokensNoSystem
:=
llm
.
Encode
(
promptNoSystem
)
llm
.
NumKeep
=
len
(
tokensWithSystem
)
-
len
(
tokensNoSystem
)
+
1
}
loaded
.
llm
=
llm
loaded
.
digest
=
model
.
Digest
loaded
.
options
=
opts
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment