Fix expert distribution dumping causes OOM (#6967)

86fe943b · fzyzcjy · GitHub · 9ecb1856 · 86fe943b
Unverified Commit 86fe943b authored Jun 09, 2025 by fzyzcjy Committed by GitHub Jun 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

python/sglang/srt/managers/expert_distribution.py python/sglang/srt/managers/expert_distribution.py +7 -0

No files found.
--- a/python/sglang/srt/managers/expert_distribution.py
+++ b/python/sglang/srt/managers/expert_distribution.py
@@ -703,6 +703,7 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin):
            dtype=torch.int32,
            device=self._server_args.device,
        )
+        self._first_dump = True

    def append(
        self,
@@ -727,9 +728,15 @@ class _StatAccumulator(_UtilizationRateAccumulatorMixin):
            num_logical_experts=self._expert_location_metadata.num_logical_experts,
            physical_to_logical_map=self._expert_location_metadata.physical_to_logical_map,
        )
+
+        if self._first_dump:
+            self._first_dump = False
+            torch.cuda.empty_cache()
+
        torch.distributed.all_reduce(
            logical_count_of_buffered_step, op=torch.distributed.ReduceOp.SUM
        )
+
        output = dict(
            rank=self._rank,
            logical_count=logical_count_of_buffered_step,