slog.Debug("cpu inference mode, model fits in available system memory","model",format.HumanBytes2(estimate.TotalSize),"available",format.HumanBytes2(gpus[0].FreeMemory))
returnnil
}
// TODO - optimization: try to find CPU only runners first, or partial offloads with enough in system memory to make room