Commit 3b811287 authored by shenzhe's avatar shenzhe
Browse files

Tune DSA MLS prefill dispatch thresholds

parent ae7d4f11
...@@ -21,15 +21,18 @@ bool can_run(const SparseAttnFwdParams& params) { ...@@ -21,15 +21,18 @@ bool can_run(const SparseAttnFwdParams& params) {
bool should_run(const SparseAttnFwdParams& params) { bool should_run(const SparseAttnFwdParams& params) {
if (!can_run(params)) return false; if (!can_run(params)) return false;
if (params.d_qk == 512 && if (params.d_qk == 512 && params.h_q == 64 && params.topk == 512 &&
((params.h_q == 64 && params.topk == 512) || params.s_kv >= 28672) {
(params.h_q == 128 && params.topk == 1024))) { return true;
}
if (params.d_qk == 512 && params.h_q == 128 && params.topk == 1024 &&
params.s_kv >= 26624) {
return true; return true;
} }
if (params.d_qk == 576 && params.topk == 2048 && if (params.d_qk == 576 && params.topk == 2048 &&
((params.h_q == 64 && params.s_kv >= 24576) || params.s_kv >= 24576) {
(params.h_q == 128 && params.s_kv >= 8192))) {
return true; return true;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment