Update triton_attention.py

d90749d3 · Atream · GitHub · 1548c992 · d90749d3
Unverified Commit d90749d3 authored Feb 15, 2025 by Atream Committed by GitHub Feb 15, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

ktransformers/operators/triton_attention.py ktransformers/operators/triton_attention.py +7 -1

No files found.
--- a/ktransformers/operators/triton_attention.py
+++ b/ktransformers/operators/triton_attention.py
+# Adapted from
+# https://github.com/sgl-project/sglang/blob/9f635ea50de920aa507f486daafba26a5b837574/python/sglang/srt/layers/attention/triton_ops/decode_attention.py
+# which was originally adapted from
+# https://github.com/ModelTC/lightllm/blob/96353e868a840db4d103138caf15ed9dbea8c186/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage1.py
+# https://github.com/ModelTC/lightllm/blob/96353e868a840db4d103138caf15ed9dbea8c186/lightllm/models/deepseek2/triton_kernel/gqa_flash_decoding_stage2.py
+
 import triton
 import triton.language as tl

@@ -376,4 +382,4 @@ def decode_attention_fwd_grouped(
    )

    _decode_softmax_reducev_fwd(attn_logits, q, o, v_buffer, b_seq_len,
-                                num_kv_splits)
\ No newline at end of file
+                                num_kv_splits)