Unverified Commit 2dd7d0c5 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Revert "Fix nightly-test CI" (#4065)

parent 0d4e3228
...@@ -427,10 +427,7 @@ class FlashInferAttnBackend(AttentionBackend): ...@@ -427,10 +427,7 @@ class FlashInferAttnBackend(AttentionBackend):
else: else:
o2, s2 = prefill_wrapper_paged.forward_return_lse( o2, s2 = prefill_wrapper_paged.forward_return_lse(
q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim), q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim),
self._to_dtype( forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id),
forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id),
q.dtype,
),
causal=False, causal=False,
sm_scale=layer.scaling, sm_scale=layer.scaling,
logits_soft_cap=layer.logit_cap, logits_soft_cap=layer.logit_cap,
...@@ -472,9 +469,7 @@ class FlashInferAttnBackend(AttentionBackend): ...@@ -472,9 +469,7 @@ class FlashInferAttnBackend(AttentionBackend):
o = decode_wrapper.forward( o = decode_wrapper.forward(
q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim), q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim),
self._to_dtype( forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id),
forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id), q.dtype
),
sm_scale=layer.scaling, sm_scale=layer.scaling,
logits_soft_cap=layer.logit_cap, logits_soft_cap=layer.logit_cap,
k_scale=layer.k_scale, k_scale=layer.k_scale,
...@@ -483,12 +478,6 @@ class FlashInferAttnBackend(AttentionBackend): ...@@ -483,12 +478,6 @@ class FlashInferAttnBackend(AttentionBackend):
return o.view(-1, layer.tp_q_head_num * layer.head_dim) return o.view(-1, layer.tp_q_head_num * layer.head_dim)
def _to_dtype(self, kv_tuple, dtype):
if kv_tuple[0].dtype != dtype:
return tuple(t.to(dtype) for t in kv_tuple)
else:
return kv_tuple
def _get_wrapper_idx(self, layer: RadixAttention): def _get_wrapper_idx(self, layer: RadixAttention):
if self.num_wrappers == 1: if self.num_wrappers == 1:
return 0 return 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment