Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
84ac7ce1
"vscode:/vscode.git/clone" did not exist on "11bf27e37190b320216c349e39b085fb33aefed1"
Commit
84ac7ce1
authored
May 09, 2024
by
Daniel Hiltgen
Browse files
Refine subprocess reaping
parent
920a4b07
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
12 deletions
+23
-12
llm/server.go
llm/server.go
+23
-12
No files found.
llm/server.go
View file @
84ac7ce1
...
...
@@ -53,6 +53,7 @@ type llmServer struct {
estimatedTotal
uint64
// Total size of model
totalLayers
uint64
gpuCount
int
loadDuration
time
.
Duration
// Record how long it took the model to load
sem
*
semaphore
.
Weighted
}
...
...
@@ -291,6 +292,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
sem
:
semaphore
.
NewWeighted
(
int64
(
numParallel
)),
totalLayers
:
ggml
.
KV
()
.
BlockCount
()
+
1
,
gpuCount
:
gpuCount
,
done
:
make
(
chan
error
,
1
),
}
s
.
cmd
.
Env
=
os
.
Environ
()
...
...
@@ -339,6 +341,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
continue
}
// reap subprocess when it exits
go
func
()
{
s
.
done
<-
s
.
cmd
.
Wait
()
}()
return
s
,
nil
}
...
...
@@ -486,6 +493,7 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
expiresAt
:=
time
.
Now
()
.
Add
(
10
*
time
.
Minute
)
// be generous with timeout, large models can take a while to load
slog
.
Info
(
"waiting for llama runner to start responding"
)
var
lastStatus
ServerStatus
=
-
1
for
{
select
{
...
...
@@ -500,12 +508,6 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
return
fmt
.
Errorf
(
"llama runner process has terminated: %v %s"
,
err
,
msg
)
default
:
}
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
200
*
time
.
Millisecond
)
defer
cancel
()
status
,
err
:=
s
.
getServerStatus
(
ctx
)
if
err
!=
nil
{
slog
.
Debug
(
"server not yet available"
,
"error"
,
err
)
}
if
time
.
Now
()
.
After
(
expiresAt
)
{
// timeout
msg
:=
""
...
...
@@ -521,14 +523,20 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
}
return
fmt
.
Errorf
(
"llama runner process no longer running: %d %s"
,
s
.
cmd
.
ProcessState
.
ExitCode
(),
msg
)
}
ctx
,
cancel
:=
context
.
WithTimeout
(
ctx
,
200
*
time
.
Millisecond
)
defer
cancel
()
status
,
_
:=
s
.
getServerStatus
(
ctx
)
if
lastStatus
!=
status
&&
status
!=
ServerStatusReady
{
// Only log on status changes
slog
.
Info
(
"waiting for server to become available"
,
"status"
,
status
.
ToString
())
}
switch
status
{
case
ServerStatusLoadingModel
:
time
.
Sleep
(
time
.
Millisecond
*
250
)
slog
.
Debug
(
"loading model"
)
case
ServerStatusReady
:
slog
.
Info
(
fmt
.
Sprintf
(
"llama runner started in %0.2f seconds"
,
time
.
Since
(
start
)
.
Seconds
()))
s
.
loadDuration
=
time
.
Since
(
start
)
slog
.
Info
(
fmt
.
Sprintf
(
"llama runner started in %0.2f seconds"
,
s
.
loadDuration
.
Seconds
()))
return
nil
default
:
lastStatus
=
status
time
.
Sleep
(
time
.
Millisecond
*
250
)
continue
}
...
...
@@ -930,8 +938,11 @@ func (s *llmServer) Close() error {
if
err
:=
s
.
cmd
.
Process
.
Kill
();
err
!=
nil
{
return
err
}
_
=
s
.
cmd
.
Wait
()
// if ProcessState is already populated, Wait already completed, no need to wait again
if
s
.
cmd
.
ProcessState
==
nil
{
slog
.
Debug
(
"waiting for llama server to exit"
)
<-
s
.
done
}
slog
.
Debug
(
"llama server stopped"
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment