Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c0960e29
Unverified
Commit
c0960e29
authored
Dec 12, 2023
by
Bruce MacDonald
Committed by
GitHub
Dec 12, 2023
Browse files
retry on concurrent request failure (#1483)
- remove parallel
parent
5314fc9b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
98 additions
and
77 deletions
+98
-77
llm/llama.go
llm/llama.go
+98
-77
No files found.
llm/llama.go
View file @
c0960e29
...
@@ -412,10 +412,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
...
@@ -412,10 +412,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
port
:=
rand
.
Intn
(
65535
-
49152
)
+
49152
// get a random port in the ephemeral range
port
:=
rand
.
Intn
(
65535
-
49152
)
+
49152
// get a random port in the ephemeral range
params
:=
append
(
params
,
"--port"
,
strconv
.
Itoa
(
port
))
params
:=
append
(
params
,
"--port"
,
strconv
.
Itoa
(
port
))
if
runner
.
Type
==
"gguf"
{
params
=
append
(
params
,
"--parallel"
,
"2"
)
}
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
cmd
:=
exec
.
CommandContext
(
cmd
:=
exec
.
CommandContext
(
ctx
,
ctx
,
...
@@ -549,6 +545,8 @@ type prediction struct {
...
@@ -549,6 +545,8 @@ type prediction struct {
}
}
const
maxBufferSize
=
512
*
format
.
KiloByte
const
maxBufferSize
=
512
*
format
.
KiloByte
const
maxRetries
=
3
const
retryDelay
=
1
*
time
.
Second
type
PredictOpts
struct
{
type
PredictOpts
struct
{
Prompt
string
Prompt
string
...
@@ -570,6 +568,11 @@ type PredictResult struct {
...
@@ -570,6 +568,11 @@ type PredictResult struct {
EvalDuration
time
.
Duration
EvalDuration
time
.
Duration
}
}
// IsRetryable checks if the line matches a condition that can be retried
func
isRetryable
(
line
[]
byte
)
bool
{
return
bytes
.
Contains
(
line
,
[]
byte
(
"slot unavailable"
))
}
func
(
llm
*
llama
)
Predict
(
ctx
context
.
Context
,
predict
PredictOpts
,
fn
func
(
PredictResult
))
error
{
func
(
llm
*
llama
)
Predict
(
ctx
context
.
Context
,
predict
PredictOpts
,
fn
func
(
PredictResult
))
error
{
imageData
:=
llm
.
ImageData
imageData
:=
llm
.
ImageData
if
len
(
predict
.
Images
)
>
0
{
if
len
(
predict
.
Images
)
>
0
{
...
@@ -607,6 +610,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
...
@@ -607,6 +610,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
request
[
"grammar"
]
=
jsonGrammar
request
[
"grammar"
]
=
jsonGrammar
}
}
for
retries
:=
0
;
retries
<
maxRetries
;
retries
++
{
if
retries
>
0
{
time
.
Sleep
(
retryDelay
)
// wait before retrying
}
// Handling JSON marshaling with special characters unescaped.
// Handling JSON marshaling with special characters unescaped.
buffer
:=
&
bytes
.
Buffer
{}
buffer
:=
&
bytes
.
Buffer
{}
enc
:=
json
.
NewEncoder
(
buffer
)
enc
:=
json
.
NewEncoder
(
buffer
)
...
@@ -642,6 +650,8 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
...
@@ -642,6 +650,8 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
// increase the buffer size to avoid running out of space
// increase the buffer size to avoid running out of space
buf
:=
make
([]
byte
,
0
,
maxBufferSize
)
buf
:=
make
([]
byte
,
0
,
maxBufferSize
)
scanner
.
Buffer
(
buf
,
maxBufferSize
)
scanner
.
Buffer
(
buf
,
maxBufferSize
)
retryNeeded
:=
false
for
scanner
.
Scan
()
{
for
scanner
.
Scan
()
{
select
{
select
{
case
<-
ctx
.
Done
()
:
case
<-
ctx
.
Done
()
:
...
@@ -653,6 +663,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
...
@@ -653,6 +663,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
continue
continue
}
}
if
isRetryable
(
line
)
{
retryNeeded
=
true
break
}
evt
,
ok
:=
bytes
.
CutPrefix
(
line
,
[]
byte
(
"data: "
))
evt
,
ok
:=
bytes
.
CutPrefix
(
line
,
[]
byte
(
"data: "
))
if
!
ok
{
if
!
ok
{
return
fmt
.
Errorf
(
"error parsing llm response stream: %s"
,
line
)
return
fmt
.
Errorf
(
"error parsing llm response stream: %s"
,
line
)
...
@@ -698,7 +713,13 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
...
@@ -698,7 +713,13 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
return
fmt
.
Errorf
(
"error reading llm response: %v"
,
err
)
return
fmt
.
Errorf
(
"error reading llm response: %v"
,
err
)
}
}
return
nil
if
!
retryNeeded
{
return
nil
// success
}
}
// should never reach here ideally
return
fmt
.
Errorf
(
"max retries exceeded"
)
}
}
type
TokenizeRequest
struct
{
type
TokenizeRequest
struct
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment