Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
942c9792
"vscode:/vscode.git/clone" did not exist on "7fb1df536132e32c1947c9d908e82ab4e5b6c6c7"
Unverified
Commit
942c9792
authored
May 05, 2024
by
Jeffrey Morgan
Committed by
GitHub
May 05, 2024
Browse files
allocate a large enough kv cache for all parallel requests (#4162)
parent
06164911
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
2 deletions
+4
-2
server/sched.go
server/sched.go
+4
-2
No files found.
server/sched.go
View file @
942c9792
...
@@ -93,6 +93,9 @@ func InitScheduler(ctx context.Context) *Scheduler {
...
@@ -93,6 +93,9 @@ func InitScheduler(ctx context.Context) *Scheduler {
// context must be canceled to decrement ref count and release the runner
// context must be canceled to decrement ref count and release the runner
func
(
s
*
Scheduler
)
GetRunner
(
c
context
.
Context
,
model
*
Model
,
opts
api
.
Options
,
sessionDuration
time
.
Duration
)
(
chan
*
runnerRef
,
chan
error
)
{
func
(
s
*
Scheduler
)
GetRunner
(
c
context
.
Context
,
model
*
Model
,
opts
api
.
Options
,
sessionDuration
time
.
Duration
)
(
chan
*
runnerRef
,
chan
error
)
{
// allocate a large enough kv cache for all parallel requests
opts
.
NumCtx
=
opts
.
NumCtx
*
numParallel
req
:=
&
LlmRequest
{
req
:=
&
LlmRequest
{
ctx
:
c
,
ctx
:
c
,
model
:
model
,
model
:
model
,
...
@@ -101,8 +104,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
...
@@ -101,8 +104,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
successCh
:
make
(
chan
*
runnerRef
),
successCh
:
make
(
chan
*
runnerRef
),
errCh
:
make
(
chan
error
,
1
),
errCh
:
make
(
chan
error
,
1
),
}
}
// context split across parallel threads
opts
.
NumCtx
=
opts
.
NumCtx
*
numParallel
select
{
select
{
case
s
.
pendingReqCh
<-
req
:
case
s
.
pendingReqCh
<-
req
:
default
:
default
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment