Unverified Commit d7339fad authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #4682 from dhiltgen/more_time

Give the final model loading more time
parents 9db0996e 92c81e81
...@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error { ...@@ -519,11 +519,13 @@ func (s *llmServer) Ping(ctx context.Context) error {
func (s *llmServer) WaitUntilRunning(ctx context.Context) error { func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
start := time.Now() start := time.Now()
stallDuration := 60 * time.Second stallDuration := 5 * time.Minute // If no progress happens
stallTimer := time.Now().Add(stallDuration) // give up if we stall for finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online
stallTimer := time.Now().Add(stallDuration) // give up if we stall
slog.Info("waiting for llama runner to start responding") slog.Info("waiting for llama runner to start responding")
var lastStatus ServerStatus = -1 var lastStatus ServerStatus = -1
fullyLoaded := false
for { for {
select { select {
...@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error { ...@@ -572,6 +574,10 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
if priorProgress != s.loadProgress { if priorProgress != s.loadProgress {
slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress)) slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
stallTimer = time.Now().Add(stallDuration) stallTimer = time.Now().Add(stallDuration)
} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
stallTimer = time.Now().Add(finalLoadDuration)
fullyLoaded = true
} }
time.Sleep(time.Millisecond * 250) time.Sleep(time.Millisecond * 250)
continue continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment