Unverified Commit 05d53457 authored by russcoss's avatar russcoss Committed by GitHub
Browse files

refactor: use the built-in max/min to simplify the code (#12280)


Signed-off-by: default avatarrusscoss <russcoss@outlook.com>
parent b225508c
...@@ -204,13 +204,8 @@ func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int { ...@@ -204,13 +204,8 @@ func (c *InputCache) ShiftDiscard(inputLen int, numKeep int) int {
targetFree = max(targetFree, 1) targetFree = max(targetFree, 1)
currentFree := c.numCtx - inputLen currentFree := c.numCtx - inputLen
discard := targetFree - currentFree
if discard < 0 { return max(targetFree-currentFree, 0)
discard = 0
}
return discard
} }
type ErrReprocessInputs struct { type ErrReprocessInputs struct {
......
...@@ -242,13 +242,8 @@ func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 { ...@@ -242,13 +242,8 @@ func (c *InputCache) ShiftDiscard(inputLen int32, numKeep int32) int32 {
targetFree = max(targetFree, 1) targetFree = max(targetFree, 1)
currentFree := c.numCtx - inputLen currentFree := c.numCtx - inputLen
discard := targetFree - currentFree
if discard < 0 { return max(targetFree-currentFree, 0)
discard = 0
}
return discard
} }
type ErrReprocessInputs struct { type ErrReprocessInputs struct {
......
...@@ -25,10 +25,7 @@ func Loop(ctx context.Context, maxBackoff time.Duration) iter.Seq2[int, error] { ...@@ -25,10 +25,7 @@ func Loop(ctx context.Context, maxBackoff time.Duration) iter.Seq2[int, error] {
// n^2 backoff timer is a little smoother than the // n^2 backoff timer is a little smoother than the
// common choice of 2^n. // common choice of 2^n.
d := time.Duration(n*n) * 10 * time.Millisecond d := min(time.Duration(n*n)*10*time.Millisecond, maxBackoff)
if d > maxBackoff {
d = maxBackoff
}
// Randomize the delay between 0.5-1.5 x msec, in order // Randomize the delay between 0.5-1.5 x msec, in order
// to prevent accidental "thundering herd" problems. // to prevent accidental "thundering herd" problems.
d = time.Duration(float64(d) * (rand.Float64() + 0.5)) d = time.Duration(float64(d) * (rand.Float64() + 0.5))
......
...@@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm ...@@ -382,10 +382,7 @@ func (pending *LlmRequest) useLoadedRunner(runner *runnerRef, finished chan *Llm
// load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs // load creates a new model based on req and loads it. If requireFull is true then the model must be loaded fully onto GPUs
// (if any). Returns whether the scheduler needs to evict a model to make this one fit. // (if any). Returns whether the scheduler needs to evict a model to make this one fit.
func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool { func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, gpus discover.GpuInfoList, requireFull bool) bool {
numParallel := int(envconfig.NumParallel()) numParallel := max(int(envconfig.NumParallel()), 1)
if numParallel < 1 {
numParallel = 1
}
// Embedding models should always be loaded with parallel=1 // Embedding models should always be loaded with parallel=1
if req.model.CheckCapabilities(model.CapabilityCompletion) != nil { if req.model.CheckCapabilities(model.CapabilityCompletion) != nil {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment