Commit bcd4dc84 authored by zhuwenwen's avatar zhuwenwen
Browse files

update deepgemm interface

parent 65bb0ebc
......@@ -637,8 +637,8 @@ def sparse_attn_indexer(
)
else:
logits = op.mqa_logits(
q_fp8[chunk.token_start:chunk.token_end],
(k_fp8, k_scale),
q_fp8[chunk.token_start:chunk.token_end].half(),
(k_fp8.half(), k_scale),
weights[chunk.token_start:chunk.token_end],
chunk.cu_seqlen_ks,
chunk.cu_seqlen_ke,
......@@ -691,11 +691,9 @@ def sparse_attn_indexer(
max_model_len=max_model_len,
)
else:
padded_q_fp8_decode_tokens = padded_q_fp8_decode_tokens.half
kv_cache = kv_cache.half
logits = gemmopt.paged_mqa_logits(
padded_q_fp8_decode_tokens,
kv_cache,
padded_q_fp8_decode_tokens.half(),
kv_cache.half(),
weights[:num_padded_tokens],
decode_metadata.seq_lens,
decode_metadata.block_table,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment