llm: Enable flash attention by default for qwen3 and qwen3moe

0bda7289 · Jesse Gross · Jesse Gross · 55ca8272 · 0bda7289
Commit 0bda7289 authored Oct 02, 2025 by Jesse Gross Committed by Jesse Gross Oct 02, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

fs/ggml/ggml.go fs/ggml/ggml.go +2 -0

No files found.
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
 		"gptoss", "gpt-oss",
+		"qwen3",
+		"qwen3moe",
 	}, f.KV().String("general.architecture"))
 }