Unverified Commit 01d47a27 authored by chenxu140's avatar chenxu140 Committed by GitHub
Browse files

[Bugfix] fix kv buffer register & dp attention & deepepmoe (#9327)

parent ecc9f3e4
...@@ -23,9 +23,7 @@ class AscendKVManager(MooncakeKVManager): ...@@ -23,9 +23,7 @@ class AscendKVManager(MooncakeKVManager):
) )
def register_buffer_to_engine(self): def register_buffer_to_engine(self):
self.engine.register( self.engine.batch_register(self.kv_args.kv_data_ptrs, self.kv_args.kv_data_lens)
self.kv_args.kv_data_ptrs[0], sum(self.kv_args.kv_data_lens)
)
# The Ascend backend optimize batch registration for small memory blocks. # The Ascend backend optimize batch registration for small memory blocks.
self.engine.batch_register( self.engine.batch_register(
self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens
......
...@@ -234,7 +234,7 @@ def initialize_dp_attention( ...@@ -234,7 +234,7 @@ def initialize_dp_attention(
_DpGatheredBufferWrapper.set_metadata( _DpGatheredBufferWrapper.set_metadata(
hidden_size=model_config.hidden_size, hidden_size=model_config.hidden_size,
dtype=model_config.dtype, dtype=model_config.dtype,
device=torch.device("cuda"), device=torch.device(server_args.device),
) )
......
...@@ -736,7 +736,7 @@ class DeepEPMoE(EPMoE): ...@@ -736,7 +736,7 @@ class DeepEPMoE(EPMoE):
assert isinstance(dispatch_output, AscendDeepEPLLOutput) assert isinstance(dispatch_output, AscendDeepEPLLOutput)
hidden_states, topk_idx, topk_weights, _, seg_indptr, _ = dispatch_output hidden_states, topk_idx, topk_weights, _, seg_indptr, _ = dispatch_output
assert self.quant_method is not None assert self.quant_method is not None
assert self.activation == "silu" assert self.moe_runner_config.activation == "silu"
# NOTE: Ascend's Dispatch & Combine does not support FP16 # NOTE: Ascend's Dispatch & Combine does not support FP16
output_dtype = torch.bfloat16 output_dtype = torch.bfloat16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment