Commit 0bda7289 authored by Jesse Gross's avatar Jesse Gross Committed by Jesse Gross
Browse files

llm: Enable flash attention by default for qwen3 and qwen3moe

parent 55ca8272
...@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool { ...@@ -899,6 +899,8 @@ func (f GGML) SupportsFlashAttention() bool {
func (f GGML) FlashAttention() bool { func (f GGML) FlashAttention() bool {
return slices.Contains([]string{ return slices.Contains([]string{
"gptoss", "gpt-oss", "gptoss", "gpt-oss",
"qwen3",
"qwen3moe",
}, f.KV().String("general.architecture")) }, f.KV().String("general.architecture"))
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment