Commit 259605da authored by zhuwenwen's avatar zhuwenwen
Browse files

update fp8_mqa_logits and fp8_paged_mqa_logits

parent a55b8f91
......@@ -627,7 +627,7 @@ def sparse_attn_indexer(
chunk.cu_seq_lens,
chunk.num_reqs,
)
if current_platform.is_rocm():
if not current_platform.is_rocm():
logits = fp8_mqa_logits(
q_fp8[chunk.token_start:chunk.token_end],
(k_fp8, k_scale),
......@@ -680,7 +680,7 @@ def sparse_attn_indexer(
next_n = padded_q_fp8_decode_tokens.shape[1]
assert batch_size == decode_metadata.seq_lens.shape[0]
num_padded_tokens = batch_size * next_n
if current_platform.is_rocm():
if not current_platform.is_rocm():
logits = fp8_paged_mqa_logits(
padded_q_fp8_decode_tokens,
kv_cache,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment