fix io group (#9154)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>

fix io group (#9154)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
733446dd · pansicheng · GitHub · 4c22897a · 733446dd
Unverified Commit 733446dd authored Aug 14, 2025 by pansicheng Committed by GitHub Aug 14, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

python/sglang/srt/managers/cache_controller.py python/sglang/srt/managers/cache_controller.py +4 -1

No files found.
--- a/python/sglang/srt/managers/cache_controller.py
+++ b/python/sglang/srt/managers/cache_controller.py
@@ -296,6 +296,9 @@ class HiCacheController:
                self.prefetch_tp_group = torch.distributed.new_group(
                    group_ranks, backend="gloo"
                )
+                self.prefetch_io_tp_group = torch.distributed.new_group(
+                    group_ranks, backend="gloo"
+                )
                self.backup_tp_group = torch.distributed.new_group(
                    group_ranks, backend="gloo"
                )
@@ -602,7 +605,7 @@ class HiCacheController:
                if self.tp_world_size > 1:
                    # to ensure all TP workers release the host memory at the same time
-                    torch.distributed.barrier(group=self.prefetch_tp_group)
+                    torch.distributed.barrier(group=self.prefetch_io_tp_group)
                # operation terminated by controller, release pre-allocated memory
                self.mem_pool_host.free(
                    operation.host_indices[operation.completed_tokens :]