Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
b123be5b
Commit
b123be5b
authored
Apr 25, 2024
by
Daniel Hiltgen
Browse files
Adjust context size for parallelism
parent
ddf5c09a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
0 deletions
+11
-0
server/sched.go
server/sched.go
+11
-0
No files found.
server/sched.go
View file @
b123be5b
...
...
@@ -46,6 +46,7 @@ type Scheduler struct {
// TODO set this to zero after a release or two, to enable multiple models by default
var
loadedMax
=
1
// Maximum runners; < 1 maps to as many as will fit in VRAM (unlimited for CPU runners)
var
maxQueuedRequests
=
10
// TODO configurable
var
numParallel
=
1
func
InitScheduler
(
ctx
context
.
Context
)
*
Scheduler
{
maxRunners
:=
os
.
Getenv
(
"OLLAMA_MAX_LOADED_MODELS"
)
...
...
@@ -57,6 +58,14 @@ func InitScheduler(ctx context.Context) *Scheduler {
loadedMax
=
m
}
}
if
onp
:=
os
.
Getenv
(
"OLLAMA_NUM_PARALLEL"
);
onp
!=
""
{
p
,
err
:=
strconv
.
Atoi
(
onp
)
if
err
!=
nil
||
p
<=
0
{
slog
.
Error
(
"invalid parallel setting, must be greater than zero"
,
"OLLAMA_NUM_PARALLEL"
,
onp
,
"error"
,
err
)
}
else
{
numParallel
=
p
}
}
sched
:=
&
Scheduler
{
pendingReqCh
:
make
(
chan
*
LlmRequest
,
maxQueuedRequests
),
...
...
@@ -81,6 +90,8 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
successCh
:
make
(
chan
*
runnerRef
),
errCh
:
make
(
chan
error
,
1
),
}
// context split across parallel threads
opts
.
NumCtx
=
opts
.
NumCtx
*
numParallel
select
{
case
s
.
pendingReqCh
<-
req
:
default
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment