Unverified Commit d950ff12 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

sched: fix runner leak during reloading unload (#10819)

When the same model is being reloaded rapidly with client connections
being canceled before the model finishes loading, the queued unload
event could cause a leak of runners by deleting a different runner from
the loaded list.
parent adff143b
......@@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
s.loadedMu.Unlock()
runner.refMu.Unlock()
slog.Debug("duplicate expired event, ignoring", "runner", runner)
} else if runner.pid != runnerToUnload.pid {
// If the pids do not match, we likely had multiple load
// failures for the same model in quick succession due to
// request context canceled and are draining the queue of
// events. Ensure the orphaned runner is properly shut down, but
// do not delete the mismatched loaded runner, or wait for VRAM
// convergence.
slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload)
runner.unload()
s.loadedMu.Unlock()
runner.refMu.Unlock()
} else {
slog.Debug("starting background wait for VRAM recovery", "runner", runner)
finished := runner.waitForVRAMRecovery()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment