Commit c964b9ad authored by zhuwenwen's avatar zhuwenwen
Browse files

skip indexer_k_cache

parent ac4f685b
...@@ -682,13 +682,13 @@ def sparse_attn_indexer( ...@@ -682,13 +682,13 @@ def sparse_attn_indexer(
quant_block_size, quant_block_size,
scale_fmt, scale_fmt,
) )
else: # else:
ops.indexer_k_cache( # ops.indexer_k_cache(
k, # k,
kv_cache, # kv_cache,
slot_mapping, # slot_mapping,
scale_fmt, # scale_fmt,
) # )
topk_indices_buffer[: hidden_states.shape[0]] = -1 topk_indices_buffer[: hidden_states.shape[0]] = -1
if has_prefill: if has_prefill:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment