otherwise, the resources will be released by the destructor.
Note: Releasing resources in the destructor may cause Python's exception handling process to hang.
enable_shrink: whether to enable shrink mode. The enable mode allocates a mask buffer to support masking ranks dynamically.
enable_dispatch_ll_layered: Enable low-latency mode with hierarchical dispatch operators.
enable_combine_overlap: deepgemm DOWN gemm overlop combine send
"""
check_nvlink_connections(group)
...
...
@@ -72,6 +76,10 @@ class Buffer:
self.low_latency_mode=low_latency_mode
self.explicitly_destroy=explicitly_destroy
self.enable_shrink=enable_shrink
ifenable_dispatch_ll_layeredandenable_shrink:# Currently, the layered algorithm for ll dispatch has been optimized, so the shrink mode is no longer supported.
print("DeepEP [ERROR] not support shrink, disable it",flush=True)