Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
dfa2f32c
Unverified
Commit
dfa2f32c
authored
May 05, 2024
by
Jeffrey Morgan
Committed by
GitHub
May 05, 2024
Browse files
unload in critical section (#4187)
parent
840424a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
10 deletions
+7
-10
server/sched.go
server/sched.go
+4
-4
server/sched_test.go
server/sched_test.go
+3
-6
No files found.
server/sched.go
View file @
dfa2f32c
...
@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
}
}
}
else
if
envconfig
.
MaxRunners
>
0
&&
loadedCount
>=
envconfig
.
MaxRunners
{
}
else
if
envconfig
.
MaxRunners
>
0
&&
loadedCount
>=
envconfig
.
MaxRunners
{
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
runnerToExpire
=
s
.
findRunnerToUnload
(
pending
)
runnerToExpire
=
s
.
findRunnerToUnload
()
}
else
{
}
else
{
// Either no models are loaded or below envconfig.MaxRunners
// Either no models are loaded or below envconfig.MaxRunners
// Get a refreshed GPU list
// Get a refreshed GPU list
...
@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
s
.
loadFn
(
pending
,
ggml
,
gpus
)
s
.
loadFn
(
pending
,
ggml
,
gpus
)
break
break
}
}
runnerToExpire
=
s
.
findRunnerToUnload
(
pending
)
runnerToExpire
=
s
.
findRunnerToUnload
()
}
}
if
runnerToExpire
==
nil
{
if
runnerToExpire
==
nil
{
...
@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
...
@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
continue
continue
}
}
s
.
loadedMu
.
Lock
()
slog
.
Debug
(
"got lock to unload"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"got lock to unload"
,
"model"
,
runner
.
model
)
runner
.
unload
()
runner
.
unload
()
s
.
loadedMu
.
Lock
()
delete
(
s
.
loaded
,
runner
.
model
)
delete
(
s
.
loaded
,
runner
.
model
)
s
.
loadedMu
.
Unlock
()
s
.
loadedMu
.
Unlock
()
slog
.
Debug
(
"runner released"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"runner released"
,
"model"
,
runner
.
model
)
...
@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
...
@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
}
}
// findRunnerToUnload finds a runner to unload to make room for a new model
// findRunnerToUnload finds a runner to unload to make room for a new model
func
(
s
*
Scheduler
)
findRunnerToUnload
(
req
*
LlmRequest
)
*
runnerRef
{
func
(
s
*
Scheduler
)
findRunnerToUnload
()
*
runnerRef
{
s
.
loadedMu
.
Lock
()
s
.
loadedMu
.
Lock
()
runnerList
:=
make
([]
*
runnerRef
,
0
,
len
(
s
.
loaded
))
runnerList
:=
make
([]
*
runnerRef
,
0
,
len
(
s
.
loaded
))
for
_
,
r
:=
range
s
.
loaded
{
for
_
,
r
:=
range
s
.
loaded
{
...
...
server/sched_test.go
View file @
dfa2f32c
...
@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
...
@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
func
TestFindRunnerToUnload
(
t
*
testing
.
T
)
{
func
TestFindRunnerToUnload
(
t
*
testing
.
T
)
{
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
100
*
time
.
Millisecond
)
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
100
*
time
.
Millisecond
)
defer
done
()
defer
done
()
req
:=
&
LlmRequest
{
ctx
:
ctx
,
opts
:
api
.
DefaultOptions
(),
}
r1
:=
&
runnerRef
{
refCount
:
1
,
sessionDuration
:
1
}
r1
:=
&
runnerRef
{
refCount
:
1
,
sessionDuration
:
1
}
r2
:=
&
runnerRef
{
sessionDuration
:
2
}
r2
:=
&
runnerRef
{
sessionDuration
:
2
}
...
@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
...
@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
s
.
loaded
[
"b"
]
=
r2
s
.
loaded
[
"b"
]
=
r2
s
.
loadedMu
.
Unlock
()
s
.
loadedMu
.
Unlock
()
resp
:=
s
.
findRunnerToUnload
(
req
)
resp
:=
s
.
findRunnerToUnload
()
require
.
Equal
(
t
,
r2
,
resp
)
require
.
Equal
(
t
,
r2
,
resp
)
r2
.
refCount
=
1
r2
.
refCount
=
1
resp
=
s
.
findRunnerToUnload
(
req
)
resp
=
s
.
findRunnerToUnload
()
require
.
Equal
(
t
,
r1
,
resp
)
require
.
Equal
(
t
,
r1
,
resp
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment