Unverified Commit 42ecb9f1 authored by Ire Gaddr's avatar Ire Gaddr Committed by GitHub
Browse files

fix(scheduler): make model unload order deterministic (#10185)

parent 5c0331fd
......@@ -667,13 +667,19 @@ func (runner *runnerRef) waitForVRAMRecovery() chan any {
return finished
}
type ByDuration []*runnerRef
func (a ByDuration) Len() int { return len(a) }
func (a ByDuration) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByDuration) Less(i, j int) bool {
// uint64 to turn negative time (never unload) to largest
return uint64(a[i].sessionDuration) < uint64(a[j].sessionDuration)
type ByDurationAndName []*runnerRef
func (a ByDurationAndName) Len() int { return len(a) }
func (a ByDurationAndName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByDurationAndName) Less(i, j int) bool {
// Primary sort by session duration (uint64 to handle negatives)
d1 := uint64(a[i].sessionDuration)
d2 := uint64(a[j].sessionDuration)
if d1 != d2 {
return d1 < d2
}
// Secondary sort by model path lex order
return a[i].modelPath < a[j].modelPath
}
// TODO - future consideration to pick runners based on size
......@@ -775,7 +781,7 @@ func (s *Scheduler) findRunnerToUnload() *runnerRef {
// In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload
// e.g., if we have multiple options, will one make room for the request?
sort.Sort(ByDuration(runnerList))
sort.Sort(ByDurationAndName(runnerList))
// First try to find a runner that's already idle
for _, runner := range runnerList {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment