Commit 8f4ec9ab authored by Jesse Gross's avatar Jesse Gross Committed by Jesse Gross
Browse files

discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have
partial offloading but were disabling it if running on pure CPU,
 which is unnecessary.
parent dbfd7bd0
...@@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int { ...@@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
// For each GPU, check if it does NOT support flash attention // For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool { func (l GpuInfoList) FlashAttentionSupported() bool {
for _, gpu := range l { for _, gpu := range l {
supportsFA := gpu.Library == "metal" || supportsFA := gpu.Library == "cpu" ||
gpu.Library == "metal" ||
(gpu.Library == "cuda" && gpu.DriverMajor >= 7) || (gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
gpu.Library == "rocm" gpu.Library == "rocm"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment