"docs/source/en/using-diffusers/other-modalities.mdx" did not exist on "c3d78cd3067612175ac9f0f8b234abf5a2e1f510"
Commit 8166063a authored by Tri Dao's avatar Tri Dao
Browse files

Use block_size=128 for d=128 on SM86 to avoid exceeding smem limit

parent 13403e81
...@@ -133,7 +133,7 @@ void run_fmha_fp16_sm80(Launch_params<FMHA_fprop_params> &launch_params, ...@@ -133,7 +133,7 @@ void run_fmha_fp16_sm80(Launch_params<FMHA_fprop_params> &launch_params,
using Kernel_traits = FMHA_kernel_traits<128, 128, 16, 1, 4, 0x08u, elem_type>; using Kernel_traits = FMHA_kernel_traits<128, 128, 16, 1, 4, 0x08u, elem_type>;
run_fmha_fp16_sm80_loop_<Kernel_traits>(launch_params, configure); run_fmha_fp16_sm80_loop_<Kernel_traits>(launch_params, configure);
} else { } else {
if (dprops->major == 8 && dprops->minor >= 0 && !launch_params.is_dropout) { if (dprops->major == 8 && dprops->minor == 0 && !launch_params.is_dropout) {
// TD [2022-06-05] Keep K in registers to reduce register spilling // TD [2022-06-05] Keep K in registers to reduce register spilling
// Gives about 6% speedup compared to using block size 128. // Gives about 6% speedup compared to using block size 128.
using Kernel_traits = FMHA_kernel_traits<256, 128, 16, 1, 4, 0x18u, elem_type>; using Kernel_traits = FMHA_kernel_traits<256, 128, 16, 1, 4, 0x18u, elem_type>;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment