Commit ce7e3565 authored by Daniël de Kok's avatar Daniël de Kok
Browse files

Use flashinfer for Gemma 2.

parent cf04a43f
...@@ -94,7 +94,7 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) -> ...@@ -94,7 +94,7 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
prefix_caching = Some("0".to_string()); prefix_caching = Some("0".to_string());
} }
match config.model_type.as_deref() { match config.model_type.as_deref() {
Some("gemma2") | Some("falcon") | Some("deepseek_v2") => { Some("falcon") | Some("deepseek_v2") => {
// Required because gemma2 needs bfloat16 which is not supported by // Required because gemma2 needs bfloat16 which is not supported by
// flashinfer ? // flashinfer ?
if attention.is_none() { if attention.is_none() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment