fix turbomind build on sm<80 (#754)

* fix * fix lint

fix turbomind build on sm<80 (#754)
* fix * fix lint
8c672a7b · q.yao · GitHub · 4744b28c · 8c672a7b
Unverified Commit 8c672a7b authored Nov 29, 2023 by q.yao Committed by GitHub Nov 29, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h ...models/llama/flash_attention2/flash_fwd_launch_template.h +6 -0

No files found.
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h
+++ b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h
@@ -14,7 +14,13 @@
 template<typename Kernel_traits, bool Is_dropout, bool Is_causal, bool Is_even_N, bool Is_even_K, bool Return_softmax>
 __global__ void flash_fwd_kernel(Flash_fwd_params params)
 {
+#if __CUDA_ARCH__ >= 800
    flash::compute_attn<Kernel_traits, Is_dropout, Is_causal, Is_even_N, Is_even_K, Return_softmax>(params);
+#else
+    // TODO: support flash attention2 on sm<80
+    assert(false);
+#endif
 }
 template<typename Kernel_traits, bool Is_dropout, bool Is_causal>