[Bugfix] fix kv buffer register & dp attention & deepepmoe (#9327)

01d47a27 · chenxu140 · GitHub · ecc9f3e4 · 01d47a27 · 01d47a27
Unverified Commit 01d47a27 authored Aug 20, 2025 by chenxu140 Committed by GitHub Aug 19, 2025
3 changed files
--- a/python/sglang/srt/disaggregation/ascend/conn.py
+++ b/python/sglang/srt/disaggregation/ascend/conn.py
@@ -23,9 +23,7 @@ class AscendKVManager(MooncakeKVManager):
        )

    def register_buffer_to_engine(self):
-        self.engine.register(
-            self.kv_args.kv_data_ptrs[0], sum(self.kv_args.kv_data_lens)
-        )
+        self.engine.batch_register(self.kv_args.kv_data_ptrs, self.kv_args.kv_data_lens)
        # The Ascend backend optimize batch registration for small memory blocks.
        self.engine.batch_register(
            self.kv_args.aux_data_ptrs, self.kv_args.aux_data_lens

--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -234,7 +234,7 @@ def initialize_dp_attention(
    _DpGatheredBufferWrapper.set_metadata(
        hidden_size=model_config.hidden_size,
        dtype=model_config.dtype,
-        device=torch.device("cuda"),
+        device=torch.device(server_args.device),
    )



--- a/python/sglang/srt/layers/moe/ep_moe/layer.py
+++ b/python/sglang/srt/layers/moe/ep_moe/layer.py
@@ -736,7 +736,7 @@ class DeepEPMoE(EPMoE):
            assert isinstance(dispatch_output, AscendDeepEPLLOutput)
        hidden_states, topk_idx, topk_weights, _, seg_indptr, _ = dispatch_output
        assert self.quant_method is not None
-        assert self.activation == "silu"
+        assert self.moe_runner_config.activation == "silu"

        # NOTE: Ascend's Dispatch & Combine does not support FP16
        output_dtype = torch.bfloat16