Unverified Commit 8c672a7b authored by q.yao's avatar q.yao Committed by GitHub
Browse files

fix turbomind build on sm<80 (#754)

* fix

* fix lint
parent 4744b28c
...@@ -14,7 +14,13 @@ ...@@ -14,7 +14,13 @@
template<typename Kernel_traits, bool Is_dropout, bool Is_causal, bool Is_even_N, bool Is_even_K, bool Return_softmax> template<typename Kernel_traits, bool Is_dropout, bool Is_causal, bool Is_even_N, bool Is_even_K, bool Return_softmax>
__global__ void flash_fwd_kernel(Flash_fwd_params params) __global__ void flash_fwd_kernel(Flash_fwd_params params)
{ {
#if __CUDA_ARCH__ >= 800
flash::compute_attn<Kernel_traits, Is_dropout, Is_causal, Is_even_N, Is_even_K, Return_softmax>(params); flash::compute_attn<Kernel_traits, Is_dropout, Is_causal, Is_even_N, Is_even_K, Return_softmax>(params);
#else
// TODO: support flash attention2 on sm<80
assert(false);
#endif
} }
template<typename Kernel_traits, bool Is_dropout, bool Is_causal> template<typename Kernel_traits, bool Is_dropout, bool Is_causal>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment