Unverified Commit c9f98622 authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

Skip scheduling cancelled requests, always reload unloaded runners (#4189)

parent aa93423f
...@@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -100,6 +100,12 @@ func (s *Scheduler) processPending(ctx context.Context) {
return return
case pending := <-s.pendingReqCh: case pending := <-s.pendingReqCh:
// Block other requests until we get this pending request running // Block other requests until we get this pending request running
if pending.ctx.Err() != nil {
slog.Debug("pending request cancelled or timed out, skipping scheduling")
continue
}
for { for {
var runnerToExpire *runnerRef var runnerToExpire *runnerRef
s.loadedMu.Lock() s.loadedMu.Lock()
...@@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool ...@@ -435,6 +441,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems... timeout = 2 * time.Minute // Initial load can take a long time for big models on slow systems...
} }
if runner.Options == nil {
return true
}
// Don't reload runner if num_gpu=-1 was provided // Don't reload runner if num_gpu=-1 was provided
optsExisting := runner.Options.Runner optsExisting := runner.Options.Runner
optsNew := req.opts.Runner optsNew := req.opts.Runner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment