"""Distributed helpers for kvprune when embedded in vLLM (use TP process group).""" from __future__ import annotations import torch import torch.distributed as dist def broadcast_from_tp_rank0( tensor: torch.Tensor, *, use_tp_group: bool ) -> None: """Broadcast ``tensor`` from group-local rank 0. When ``use_tp_group`` is False (standalone compactor subprocesses), uses the default process group (world == tensor parallel size). When True (embedded in a vLLM worker), uses vLLM's tensor-parallel group so collectives do not accidentally involve DP/PP ranks if the default group is global. """ if not use_tp_group: dist.broadcast(tensor, src=0) return from vllm.distributed.parallel_state import get_tp_group get_tp_group().broadcast(tensor, src=0) def barrier_sync(*, use_tp_group: bool) -> None: """Barrier across either the default group or the TP group (see :func:`broadcast_from_tp_rank0`).""" if not use_tp_group: dist.barrier() return from vllm.distributed.parallel_state import get_tp_group get_tp_group().barrier()