Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
orangecat
ollama
Commits
3e226112
"examples/vscode:/vscode.git/clone" did not exist on "7b100ce589b917d4c116c9e61a6ec46d4f2ab062"
Unverified
Commit
3e226112
authored
Mar 12, 2024
by
Bruce MacDonald
Committed by
GitHub
Mar 12, 2024
Browse files
token repeat limit for prediction requests (#3080)
parent
a54d4a28
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
7 deletions
+27
-7
llm/dyn_ext_server.go
llm/dyn_ext_server.go
+27
-7
No files found.
llm/dyn_ext_server.go
View file @
3e226112
...
@@ -228,17 +228,14 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
...
@@ -228,17 +228,14 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
}
}
retryNeeded
:=
false
retryNeeded
:=
false
// keep track of the last token generated, this is used to abort if the model starts looping
var
lastToken
string
var
tokenRepeat
int
out
:
out
:
for
{
for
{
select
{
select
{
case
<-
ctx
.
Done
()
:
case
<-
ctx
.
Done
()
:
// This handles the request cancellation
return
cancelCompletion
(
llm
,
resp
)
C
.
dyn_llama_server_completion_cancel
(
llm
.
s
,
resp
.
id
,
&
resp
)
if
resp
.
id
<
0
{
return
extServerResponseToErr
(
resp
)
}
else
{
return
nil
}
default
:
default
:
var
result
C
.
ext_server_task_result_t
var
result
C
.
ext_server_task_result_t
C
.
dyn_llama_server_completion_next_result
(
llm
.
s
,
resp
.
id
,
&
result
)
C
.
dyn_llama_server_completion_next_result
(
llm
.
s
,
resp
.
id
,
&
result
)
...
@@ -261,6 +258,20 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
...
@@ -261,6 +258,20 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
break
out
break
out
}
}
switch
{
case
strings
.
TrimSpace
(
p
.
Content
)
==
lastToken
:
tokenRepeat
++
default
:
lastToken
=
strings
.
TrimSpace
(
p
.
Content
)
tokenRepeat
=
0
}
// 30 picked as an arbitrary max token repeat limit, modify as needed
if
tokenRepeat
>
30
{
slog
.
Debug
(
"prediction aborted, token repeat limit reached"
)
return
cancelCompletion
(
llm
,
resp
)
}
if
p
.
Content
!=
""
{
if
p
.
Content
!=
""
{
fn
(
PredictResult
{
fn
(
PredictResult
{
Content
:
p
.
Content
,
Content
:
p
.
Content
,
...
@@ -288,6 +299,15 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
...
@@ -288,6 +299,15 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
return
fmt
.
Errorf
(
"max retries exceeded"
)
return
fmt
.
Errorf
(
"max retries exceeded"
)
}
}
func
cancelCompletion
(
llm
*
dynExtServer
,
resp
C
.
ext_server_resp_t
)
error
{
C
.
dyn_llama_server_completion_cancel
(
llm
.
s
,
resp
.
id
,
&
resp
)
if
resp
.
id
<
0
{
return
extServerResponseToErr
(
resp
)
}
else
{
return
nil
}
}
func
(
llm
*
dynExtServer
)
Encode
(
ctx
context
.
Context
,
prompt
string
)
([]
int
,
error
)
{
func
(
llm
*
dynExtServer
)
Encode
(
ctx
context
.
Context
,
prompt
string
)
([]
int
,
error
)
{
data
,
err
:=
json
.
Marshal
(
TokenizeRequest
{
Content
:
prompt
})
data
,
err
:=
json
.
Marshal
(
TokenizeRequest
{
Content
:
prompt
})
if
err
!=
nil
{
if
err
!=
nil
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment