Unverified Commit 0e964ea5 authored by Soji Adeshina's avatar Soji Adeshina Committed by GitHub
Browse files

increase timeout for dgl.nn.NodeEmbedding TCPStore get/wait to 10 minutes (#2967)


Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent 2ae190d6
...@@ -82,7 +82,7 @@ class NodeEmbedding: # NodeEmbedding ...@@ -82,7 +82,7 @@ class NodeEmbedding: # NodeEmbedding
# embeding status synchronization across GPU processes # embeding status synchronization across GPU processes
if _STORE is None: if _STORE is None:
_STORE = th.distributed.TCPStore( _STORE = th.distributed.TCPStore(
host_name, port, world_size, True, timedelta(seconds=30)) host_name, port, world_size, True, timedelta(seconds=10*60))
for _ in range(1, world_size): for _ in range(1, world_size):
# send embs # send embs
_STORE.set(name, name) _STORE.set(name, name)
...@@ -90,7 +90,7 @@ class NodeEmbedding: # NodeEmbedding ...@@ -90,7 +90,7 @@ class NodeEmbedding: # NodeEmbedding
# receive # receive
if _STORE is None: if _STORE is None:
_STORE = th.distributed.TCPStore( _STORE = th.distributed.TCPStore(
host_name, port, world_size, False, timedelta(seconds=30)) host_name, port, world_size, False, timedelta(seconds=10*60))
_STORE.wait([name]) _STORE.wait([name])
emb = get_shared_mem_array(name, (num_embeddings, embedding_dim), th.float32) emb = get_shared_mem_array(name, (num_embeddings, embedding_dim), th.float32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment