Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
dfa2f32c
"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "df267ee4e8500a2ef5960879f6d1ea49cc8ec40d"
Unverified
Commit
dfa2f32c
authored
May 05, 2024
by
Jeffrey Morgan
Committed by
GitHub
May 05, 2024
Browse files
unload in critical section (#4187)
parent
840424a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
10 deletions
+7
-10
server/sched.go
server/sched.go
+4
-4
server/sched_test.go
server/sched_test.go
+3
-6
No files found.
server/sched.go
View file @
dfa2f32c
...
@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
}
}
}
else
if
envconfig
.
MaxRunners
>
0
&&
loadedCount
>=
envconfig
.
MaxRunners
{
}
else
if
envconfig
.
MaxRunners
>
0
&&
loadedCount
>=
envconfig
.
MaxRunners
{
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
runnerToExpire
=
s
.
findRunnerToUnload
(
pending
)
runnerToExpire
=
s
.
findRunnerToUnload
()
}
else
{
}
else
{
// Either no models are loaded or below envconfig.MaxRunners
// Either no models are loaded or below envconfig.MaxRunners
// Get a refreshed GPU list
// Get a refreshed GPU list
...
@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
s
.
loadFn
(
pending
,
ggml
,
gpus
)
s
.
loadFn
(
pending
,
ggml
,
gpus
)
break
break
}
}
runnerToExpire
=
s
.
findRunnerToUnload
(
pending
)
runnerToExpire
=
s
.
findRunnerToUnload
()
}
}
if
runnerToExpire
==
nil
{
if
runnerToExpire
==
nil
{
...
@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
...
@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
continue
continue
}
}
s
.
loadedMu
.
Lock
()
slog
.
Debug
(
"got lock to unload"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"got lock to unload"
,
"model"
,
runner
.
model
)
runner
.
unload
()
runner
.
unload
()
s
.
loadedMu
.
Lock
()
delete
(
s
.
loaded
,
runner
.
model
)
delete
(
s
.
loaded
,
runner
.
model
)
s
.
loadedMu
.
Unlock
()
s
.
loadedMu
.
Unlock
()
slog
.
Debug
(
"runner released"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"runner released"
,
"model"
,
runner
.
model
)
...
@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
...
@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
}
}
// findRunnerToUnload finds a runner to unload to make room for a new model
// findRunnerToUnload finds a runner to unload to make room for a new model
func
(
s
*
Scheduler
)
findRunnerToUnload
(
req
*
LlmRequest
)
*
runnerRef
{
func
(
s
*
Scheduler
)
findRunnerToUnload
()
*
runnerRef
{
s
.
loadedMu
.
Lock
()
s
.
loadedMu
.
Lock
()
runnerList
:=
make
([]
*
runnerRef
,
0
,
len
(
s
.
loaded
))
runnerList
:=
make
([]
*
runnerRef
,
0
,
len
(
s
.
loaded
))
for
_
,
r
:=
range
s
.
loaded
{
for
_
,
r
:=
range
s
.
loaded
{
...
...
server/sched_test.go
View file @
dfa2f32c
...
@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
...
@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
func
TestFindRunnerToUnload
(
t
*
testing
.
T
)
{
func
TestFindRunnerToUnload
(
t
*
testing
.
T
)
{
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
100
*
time
.
Millisecond
)
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
100
*
time
.
Millisecond
)
defer
done
()
defer
done
()
req
:=
&
LlmRequest
{
ctx
:
ctx
,
opts
:
api
.
DefaultOptions
(),
}
r1
:=
&
runnerRef
{
refCount
:
1
,
sessionDuration
:
1
}
r1
:=
&
runnerRef
{
refCount
:
1
,
sessionDuration
:
1
}
r2
:=
&
runnerRef
{
sessionDuration
:
2
}
r2
:=
&
runnerRef
{
sessionDuration
:
2
}
...
@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
...
@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
s
.
loaded
[
"b"
]
=
r2
s
.
loaded
[
"b"
]
=
r2
s
.
loadedMu
.
Unlock
()
s
.
loadedMu
.
Unlock
()
resp
:=
s
.
findRunnerToUnload
(
req
)
resp
:=
s
.
findRunnerToUnload
()
require
.
Equal
(
t
,
r2
,
resp
)
require
.
Equal
(
t
,
r2
,
resp
)
r2
.
refCount
=
1
r2
.
refCount
=
1
resp
=
s
.
findRunnerToUnload
(
req
)
resp
=
s
.
findRunnerToUnload
()
require
.
Equal
(
t
,
r1
,
resp
)
require
.
Equal
(
t
,
r1
,
resp
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment