refactor: use the built-in max/min to simplify the code (#12280)

Signed-off-by: russcoss <russcoss@outlook.com>

refactor: use the built-in max/min to simplify the code (#12280)
Signed-off-by: russcoss <russcoss@outlook.com>
05d53457 · russcoss · GitHub · b225508c · 05d53457 · 05d53457
Unverified Commit 05d53457 authored Sep 16, 2025 by russcoss Committed by GitHub Sep 16, 2025
4 changed files
--- a/runner/llamarunner/cache.go
+++ b/runner/llamarunner/cache.go
@@ -204,13 +204,8 @@ func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int {
 	targetFree = max(targetFree, 1)
 	currentFree := c.numCtx - inputLen
-	discard := targetFree - currentFree
-	if discard < 0 {
+	return max(targetFree-currentFree, 0)
-		discard = 0
-	}
-	return discard
 }
 type ErrReprocessInputs struct {

--- a/runner/ollamarunner/cache.go
+++ b/runner/ollamarunner/cache.go
@@ -242,13 +242,8 @@ func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
 	targetFree = max(targetFree, 1)
 	currentFree := c.numCtx - inputLen
-	discard := targetFree - currentFree
-	if discard < 0 {
+	return max(targetFree-currentFree, 0)
-		discard = 0
-	}
-	return discard
 }
 type ErrReprocessInputs struct {

--- a/server/internal/internal/backoff/backoff.go
+++ b/server/internal/internal/backoff/backoff.go
@@ -25,10 +25,7 @@ func Loop(ctx context.Context, maxBackoff time.Duration) iter.Seq2[int, error] {
 			// n^2 backoff timer is a little smoother than the
 			// common choice of 2^n.
-			d := time.Duration(n*n) * 10 * time.Millisecond
+			d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
-			if d > maxBackoff {
-				d = maxBackoff
-			}
 			// Randomize the delay between 0.5-1.5 x msec, in order
 			// to prevent accidental "thundering herd" problems.
 			d = time.Duration(float64(d) * (rand.Float64() + 0.5))

--- a/server/sched.go
+++ b/server/sched.go
@@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
 // load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs
 // (if any). Returns whether the scheduler needs to evict a model to make this one fit.
 func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool {
-	numParallel := int(envconfig.NumParallel())
+	numParallel := max(int(envconfig.NumParallel()), 1)
-	if numParallel < 1 {
-		numParallel = 1
-	}
 	// Embedding models should always be loaded with parallel=1
 	if req.model.CheckCapabilities(model.CapabilityCompletion) != nil {