Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
36a6dacc
Commit
36a6dacc
authored
Apr 24, 2024
by
Bryce Reitano
Browse files
Restructure loading conditional chain
parent
ceb0e26e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
19 deletions
+18
-19
server/sched.go
server/sched.go
+17
-18
server/sched_test.go
server/sched_test.go
+1
-1
No files found.
server/sched.go
View file @
36a6dacc
...
...
@@ -123,36 +123,35 @@ func (s *Scheduler) processPending(ctx context.Context) {
pending
.
useLoadedRunner
(
runner
,
s
.
finishedReqCh
)
break
}
}
else
if
loadedCount
==
0
{
slog
.
Debug
(
"loading first model"
,
"model"
,
pending
.
model
.
ModelPath
)
gpus
:=
s
.
getGpuFn
()
ggml
,
err
:=
llm
.
LoadModel
(
pending
.
model
.
ModelPath
)
if
err
!=
nil
{
pending
.
errCh
<-
err
break
}
g
:=
pickBestFitGPUs
(
pending
,
ggml
,
gpus
)
if
g
!=
nil
{
gpus
=
g
}
s
.
loadFn
(
pending
,
ggml
,
gpus
)
break
}
else
if
loadedMax
>
0
&&
loadedCount
>=
loadedMax
{
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
runnerToExpire
=
s
.
findRunnerToUnload
(
pending
)
}
else
{
//
More than one loaded model, so we have to see if the new one fits
//
Either no models are loaded or below loadedMax
// Get a refreshed GPU list
gpus
:=
s
.
getGpuFn
()
// Update free memory from currently loaded models
s
.
updateFreeSpace
(
gpus
)
// Load model for fitting
ggml
,
err
:=
llm
.
LoadModel
(
pending
.
model
.
ModelPath
)
if
err
!=
nil
{
pending
.
errCh
<-
err
break
}
// No models loaded. Load the model but prefer the best fit.
if
loadedCount
==
0
{
slog
.
Debug
(
"loading first model"
,
"model"
,
pending
.
model
.
ModelPath
)
g
:=
pickBestFitGPUs
(
pending
,
ggml
,
gpus
)
if
g
!=
nil
{
gpus
=
g
}
s
.
loadFn
(
pending
,
ggml
,
gpus
)
break
}
// More than one loaded model, so we have to see if the new one fits
// Update free memory from currently loaded models
s
.
updateFreeSpace
(
gpus
)
gpus
=
pickBestFitGPUs
(
pending
,
ggml
,
gpus
)
if
gpus
!=
nil
{
slog
.
Debug
(
"new model fits with existing models, loading"
)
...
...
server/sched_test.go
View file @
36a6dacc
...
...
@@ -47,7 +47,7 @@ func TestLoad(t *testing.T) {
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
5
*
time
.
Millisecond
)
defer
done
()
s
:=
InitScheduler
(
ctx
)
ggml
:=
nil
// value not used in tests
var
ggml
*
llm
.
GGML
// value not used in tests
req
:=
&
LlmRequest
{
ctx
:
ctx
,
model
:
&
Model
{
ModelPath
:
"foo"
},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment