Commit bd3c995f authored by lijian's avatar lijian
Browse files

Add rocshmem env on buffer.py.


Signed-off-by: default avatarlijian <34831075+lijian0711@users.noreply.github.com>
parent 1bc17d5d
...@@ -113,28 +113,21 @@ class Buffer: ...@@ -113,28 +113,21 @@ class Buffer:
self._setup_device_hca_mapping() self._setup_device_hca_mapping()
assert num_qps_per_rank > 0 assert num_qps_per_rank > 0
os.environ["DUSHMEM_DISABLE_P2P"] = "0" if allow_nvlink_for_low_latency_mode else "1" os.environ["DUSHMEM_DISABLE_P2P"] = "0" if allow_nvlink_for_low_latency_mode else "1"
# os.environ["DUSHMEM_IB_ENABLE_IBGDA"] = "1"
os.environ["DUSHMEM_IB_ENABLE_IBGDA"] = "0" # force_use_ibrc os.environ["DUSHMEM_IB_ENABLE_IBGDA"] = "0" # force_use_ibrc
os.environ["DUSHMEM_IBGDA_NIC_HANDLER"] = "gpu" os.environ["DUSHMEM_IBGDA_NIC_HANDLER"] = "gpu"
os.environ["DUSHMEM_IB_DISABLE_DMABUF"] = "1"
os.environ["DUSHMEM_ENABLE_NIC_PE_MAPPING"] = "1" os.environ["DUSHMEM_ENABLE_NIC_PE_MAPPING"] = "1"
os.environ["DUSHMEM_IBGDA_NUM_RC_PER_PE"] = f"{num_qps_per_rank}" os.environ["DUSHMEM_IBGDA_NUM_RC_PER_PE"] = f"{num_qps_per_rank}"
# Make sure QP depth is always larger than the number of on-flight WRs, so that we can skip WQ slot check
os.environ["DUSHMEM_QP_DEPTH"] = os.environ.get("DUSHMEM_QP_DEPTH", "1024")
# Reduce gpu memory usage
# 6 default teams + 1 extra team
os.environ["DUSHMEM_MAX_TEAMS"] = "7" os.environ["DUSHMEM_MAX_TEAMS"] = "7"
# Disable NVLink SHArP
os.environ["DUSHMEM_DISABLE_NVLS"] = "1" os.environ["DUSHMEM_DISABLE_NVLS"] = "1"
# NOTES: DUSHMEM initialization requires at least 256 MiB
os.environ["DUSHMEM_CUMEM_GRANULARITY"] = f"{2 ** 29}"
if not allow_mnnvl: self.gda_num_qps_per_pe = max(int(os.environ.get('ROCSHMEM_GDA_NUM_QPS_PER_PE_DEFAULT_CTX', str(num_qps_per_rank))), num_qps_per_rank)
# Disable multi-node NVLink detection os.environ["ROCSHMEM_GDA_NUM_QPS_DEFAULT_CTX"] = str(self.gda_num_qps_per_pe)
os.environ["DUSHMEM_DISABLE_MNNVL"] = "1" if self.num_rdma_bytes > 1073741824:
multiple = 2147483648
rocshmem_num_rdma_bytes = ((self.num_rdma_bytes + multiple - 1) // multiple) * multiple
os.environ["ROCSHMEM_HEAP_SIZE"] = str(rocshmem_num_rdma_bytes)
if self.group_size <= 8:
os.environ["ROCSHMEM_BACKEND"] = "ipc"
# Synchronize using the root ID # Synchronize using the root ID
dushmem_unique_ids = [ dushmem_unique_ids = [
......
rocshmem @ bea1a2e7
Subproject commit d8a4cb56d62694d1ac624f36a3db4325dd7c4d89 Subproject commit bea1a2e7dc6abaa40d4def4800bb1eef52735e2b
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment