Commit bcd4dc84 authored by zhuwenwen's avatar zhuwenwen
Browse files

update deepgemm interface

parent 65bb0ebc
...@@ -637,8 +637,8 @@ def sparse_attn_indexer( ...@@ -637,8 +637,8 @@ def sparse_attn_indexer(
) )
else: else:
logits = op.mqa_logits( logits = op.mqa_logits(
q_fp8[chunk.token_start:chunk.token_end], q_fp8[chunk.token_start:chunk.token_end].half(),
(k_fp8, k_scale), (k_fp8.half(), k_scale),
weights[chunk.token_start:chunk.token_end], weights[chunk.token_start:chunk.token_end],
chunk.cu_seqlen_ks, chunk.cu_seqlen_ks,
chunk.cu_seqlen_ke, chunk.cu_seqlen_ke,
...@@ -691,11 +691,9 @@ def sparse_attn_indexer( ...@@ -691,11 +691,9 @@ def sparse_attn_indexer(
max_model_len=max_model_len, max_model_len=max_model_len,
) )
else: else:
padded_q_fp8_decode_tokens = padded_q_fp8_decode_tokens.half
kv_cache = kv_cache.half
logits = gemmopt.paged_mqa_logits( logits = gemmopt.paged_mqa_logits(
padded_q_fp8_decode_tokens, padded_q_fp8_decode_tokens.half(),
kv_cache, kv_cache.half(),
weights[:num_padded_tokens], weights[:num_padded_tokens],
decode_metadata.seq_lens, decode_metadata.seq_lens,
decode_metadata.block_table, decode_metadata.block_table,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment