Commit ec231a79 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Remove VRAM convergence check for windows

The APIs we query are optimistic on free space, and windows pages
VRAM, so we don't have to wait to see reported usage recover on unload
parent 7ca71a6b
...@@ -6,6 +6,7 @@ import ( ...@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"log/slog" "log/slog"
"reflect" "reflect"
"runtime"
"sort" "sort"
"strings" "strings"
"sync" "sync"
...@@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool ...@@ -487,8 +488,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} { func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
finished := make(chan interface{}, 1) finished := make(chan interface{}, 1)
// CPU or Metal don't need checking, so no waiting required // CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
if len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal") { if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
finished <- struct{}{} finished <- struct{}{}
return finished return finished
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment