Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
565648f3
Unverified
Commit
565648f3
authored
Oct 18, 2023
by
Bruce MacDonald
Committed by
GitHub
Oct 18, 2023
Browse files
relay CUDA errors to the client (#825)
parent
3a247717
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
12 deletions
+35
-12
llm/llama.go
llm/llama.go
+35
-12
No files found.
llm/llama.go
View file @
565648f3
...
@@ -188,7 +188,7 @@ type Running struct {
...
@@ -188,7 +188,7 @@ type Running struct {
Cancel
context
.
CancelFunc
Cancel
context
.
CancelFunc
exitOnce
sync
.
Once
exitOnce
sync
.
Once
exitCh
chan
error
// channel to receive the exit status of the subprocess
exitCh
chan
error
// channel to receive the exit status of the subprocess
exitErr
error
// error returned by the sub
process
*
StatusWriter
// captures error messages from the llama runner
process
}
}
type
llama
struct
{
type
llama
struct
{
...
@@ -260,6 +260,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
...
@@ -260,6 +260,7 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
// StatusWriter is a writer that captures error messages from the llama runner process
// StatusWriter is a writer that captures error messages from the llama runner process
type
StatusWriter
struct
{
type
StatusWriter
struct
{
ErrCh
chan
error
ErrCh
chan
error
LastErrMsg
string
}
}
func
NewStatusWriter
()
*
StatusWriter
{
func
NewStatusWriter
()
*
StatusWriter
{
...
@@ -269,9 +270,18 @@ func NewStatusWriter() *StatusWriter {
...
@@ -269,9 +270,18 @@ func NewStatusWriter() *StatusWriter {
}
}
func
(
w
*
StatusWriter
)
Write
(
b
[]
byte
)
(
int
,
error
)
{
func
(
w
*
StatusWriter
)
Write
(
b
[]
byte
)
(
int
,
error
)
{
var
errMsg
string
if
_
,
after
,
ok
:=
bytes
.
Cut
(
b
,
[]
byte
(
"error:"
));
ok
{
if
_
,
after
,
ok
:=
bytes
.
Cut
(
b
,
[]
byte
(
"error:"
));
ok
{
w
.
ErrCh
<-
fmt
.
Errorf
(
"llama runner: %s"
,
bytes
.
TrimSpace
(
after
))
errMsg
=
string
(
bytes
.
TrimSpace
(
after
))
}
else
if
_
,
after
,
ok
:=
bytes
.
Cut
(
b
,
[]
byte
(
"CUDA error"
));
ok
{
errMsg
=
string
(
bytes
.
TrimSpace
(
after
))
}
}
if
errMsg
!=
""
{
w
.
LastErrMsg
=
errMsg
w
.
ErrCh
<-
fmt
.
Errorf
(
"llama runner: %s"
,
errMsg
)
}
return
os
.
Stderr
.
Write
(
b
)
return
os
.
Stderr
.
Write
(
b
)
}
}
...
@@ -359,7 +369,13 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
...
@@ -359,7 +369,13 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
// monitor the llama runner process and signal when it exits
// monitor the llama runner process and signal when it exits
go
func
()
{
go
func
()
{
err
:=
llm
.
Cmd
.
Wait
()
err
:=
llm
.
Cmd
.
Wait
()
llm
.
exitErr
=
err
// default to printing the exit message of the command process, it will probably just say 'exit staus 1'
errMsg
:=
err
.
Error
()
// try to set a better error message if llama runner logs captured an error
if
statusWriter
.
LastErrMsg
!=
""
{
errMsg
=
statusWriter
.
LastErrMsg
}
log
.
Println
(
errMsg
)
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
llm
.
exitOnce
.
Do
(
func
()
{
llm
.
exitOnce
.
Do
(
func
()
{
close
(
llm
.
exitCh
)
close
(
llm
.
exitCh
)
...
@@ -429,10 +445,9 @@ func (llm *llama) Close() {
...
@@ -429,10 +445,9 @@ func (llm *llama) Close() {
// wait for the command to exit to prevent race conditions with the next run
// wait for the command to exit to prevent race conditions with the next run
<-
llm
.
exitCh
<-
llm
.
exitCh
err
:=
llm
.
exitErr
if
err
!=
nil
{
if
llm
.
StatusWriter
!=
nil
&&
llm
.
StatusWriter
.
LastErrMsg
!=
""
{
log
.
Printf
(
"llama runner stopped with error: %v"
,
err
)
log
.
Printf
(
"llama runner stopped with error: %v"
,
llm
.
StatusWriter
.
LastErrMsg
)
}
else
{
}
else
{
log
.
Print
(
"llama runner stopped successfully"
)
log
.
Print
(
"llama runner stopped successfully"
)
}
}
...
@@ -569,6 +584,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
...
@@ -569,6 +584,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
}
}
if
err
:=
scanner
.
Err
();
err
!=
nil
{
if
err
:=
scanner
.
Err
();
err
!=
nil
{
if
strings
.
Contains
(
err
.
Error
(),
"unexpected EOF"
)
{
// this means the llama runner subprocess crashed
llm
.
Close
()
if
llm
.
StatusWriter
!=
nil
&&
llm
.
StatusWriter
.
LastErrMsg
!=
""
{
return
fmt
.
Errorf
(
"llama runner exited: %v"
,
llm
.
StatusWriter
.
LastErrMsg
)
}
return
fmt
.
Errorf
(
"llama runner exited, you may not have enough available memory to run this model"
)
}
return
fmt
.
Errorf
(
"error reading llm response: %v"
,
err
)
return
fmt
.
Errorf
(
"error reading llm response: %v"
,
err
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment