"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "df267ee4e8500a2ef5960879f6d1ea49cc8ec40d"
Unverified Commit dfa2f32c authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

unload in critical section (#4187)

parent 840424a2
...@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -116,7 +116,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
} }
} else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners { } else if envconfig.MaxRunners > 0 && loadedCount >= envconfig.MaxRunners {
slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount) slog.Debug("max runners achieved, unloading one to make room", "runner_count", loadedCount)
runnerToExpire = s.findRunnerToUnload(pending) runnerToExpire = s.findRunnerToUnload()
} else { } else {
// Either no models are loaded or below envconfig.MaxRunners // Either no models are loaded or below envconfig.MaxRunners
// Get a refreshed GPU list // Get a refreshed GPU list
...@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -157,7 +157,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
s.loadFn(pending, ggml, gpus) s.loadFn(pending, ggml, gpus)
break break
} }
runnerToExpire = s.findRunnerToUnload(pending) runnerToExpire = s.findRunnerToUnload()
} }
if runnerToExpire == nil { if runnerToExpire == nil {
...@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) { ...@@ -257,9 +257,9 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
continue continue
} }
s.loadedMu.Lock()
slog.Debug("got lock to unload", "model", runner.model) slog.Debug("got lock to unload", "model", runner.model)
runner.unload() runner.unload()
s.loadedMu.Lock()
delete(s.loaded, runner.model) delete(s.loaded, runner.model)
s.loadedMu.Unlock() s.loadedMu.Unlock()
slog.Debug("runner released", "model", runner.model) slog.Debug("runner released", "model", runner.model)
...@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu. ...@@ -504,7 +504,7 @@ func pickBestFitGPUs(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) gpu.
} }
// findRunnerToUnload finds a runner to unload to make room for a new model // findRunnerToUnload finds a runner to unload to make room for a new model
func (s *Scheduler) findRunnerToUnload(req *LlmRequest) *runnerRef { func (s *Scheduler) findRunnerToUnload() *runnerRef {
s.loadedMu.Lock() s.loadedMu.Lock()
runnerList := make([]*runnerRef, 0, len(s.loaded)) runnerList := make([]*runnerRef, 0, len(s.loaded))
for _, r := range s.loaded { for _, r := range s.loaded {
......
...@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) { ...@@ -473,10 +473,7 @@ func TestUpdateFreeSpace(t *testing.T) {
func TestFindRunnerToUnload(t *testing.T) { func TestFindRunnerToUnload(t *testing.T) {
ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer done() defer done()
req := &LlmRequest{
ctx: ctx,
opts: api.DefaultOptions(),
}
r1 := &runnerRef{refCount: 1, sessionDuration: 1} r1 := &runnerRef{refCount: 1, sessionDuration: 1}
r2 := &runnerRef{sessionDuration: 2} r2 := &runnerRef{sessionDuration: 2}
...@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) { ...@@ -486,10 +483,10 @@ func TestFindRunnerToUnload(t *testing.T) {
s.loaded["b"] = r2 s.loaded["b"] = r2
s.loadedMu.Unlock() s.loadedMu.Unlock()
resp := s.findRunnerToUnload(req) resp := s.findRunnerToUnload()
require.Equal(t, r2, resp) require.Equal(t, r2, resp)
r2.refCount = 1 r2.refCount = 1
resp = s.findRunnerToUnload(req) resp = s.findRunnerToUnload()
require.Equal(t, r1, resp) require.Equal(t, r1, resp)
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment