slog.Debug("new model will fit in available VRAM in single GPU, loading","model",req.model.ModelPath,"gpu",g.ID,"available",g.FreeMemory,"required",format.HumanBytes2(estimatedVRAM))
slog.Info("new model will fit in available VRAM in single GPU, loading","model",req.model.ModelPath,"gpu",g.ID,"parallel",p,"available",g.FreeMemory,"required",format.HumanBytes2(estimatedVRAM))
*numParallel=p
return[]gpu.GpuInfo{g}
return[]gpu.GpuInfo{g}
}
}
}
}
}
}
}
// TODO future refinements
// TODO future refinements
// - if multiple Libraries, see if any single GPU in any Library will fit
// - if multiple Libraries, see if any single GPU in any Library will fit
// - try subsets of GPUs instead of just falling back to 1 or all in a family
// - try subsets of GPUs instead of just falling back to 1 or all in a family
slog.Debug("new model will fit in available VRAM, loading","model",req.model.ModelPath,"library",sgl[0].Library,"required",format.HumanBytes2(estimatedVRAM))
slog.Info("new model will fit in available VRAM, loading","model",req.model.ModelPath,"library",sgl[0].Library,"parallel",p,"required",format.HumanBytes2(estimatedVRAM))