"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "80fd9260bb12911bc702ab2886971a89b45399fc"
Unverified Commit abcc9cce authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

disable multiple groups tests due to random failure in CI (#4101)

parent 549df65a
...@@ -44,7 +44,8 @@ def start_server(server_id, ip_config, num_servers, num_clients, server_state, \ ...@@ -44,7 +44,8 @@ def start_server(server_id, ip_config, num_servers, num_clients, server_state, \
assert net_type == 'tensorpipe', \ assert net_type == 'tensorpipe', \
"net_type can only be 'tensorpipe' if 'keep_alive' is enabled." "net_type can only be 'tensorpipe' if 'keep_alive' is enabled."
print("As configured, this server will keep alive for multiple" print("As configured, this server will keep alive for multiple"
" client groups until force shutdown request is received.") " client groups until force shutdown request is received."
" [WARNING] This feature is experimental and not fully tested.")
# Register signal handler. # Register signal handler.
rpc.register_sig_handler() rpc.register_sig_handler()
# Register some basic services # Register some basic services
......
...@@ -586,10 +586,12 @@ def test_server_client(): ...@@ -586,10 +586,12 @@ def test_server_client():
check_server_client_hetero(False, 1, 1) check_server_client_hetero(False, 1, 1)
check_server_client(True, 1, 1) check_server_client(True, 1, 1)
check_server_client(False, 1, 1) check_server_client(False, 1, 1)
check_server_client(True, 2, 2) # [TODO][Rhett] Tests for multiple groups may fail sometimes and
check_server_client(True, 1, 1, 2) # root cause is unknown. Let's disable them for now.
check_server_client(False, 1, 1, 2) #check_server_client(True, 2, 2)
check_server_client(True, 2, 2, 2) #check_server_client(True, 1, 1, 2)
#check_server_client(False, 1, 1, 2)
#check_server_client(True, 2, 2, 2)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding") @unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding")
...@@ -599,10 +601,12 @@ def test_dist_emb_server_client(): ...@@ -599,10 +601,12 @@ def test_dist_emb_server_client():
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ['DGL_DIST_MODE'] = 'distributed'
check_dist_emb_server_client(True, 1, 1) check_dist_emb_server_client(True, 1, 1)
check_dist_emb_server_client(False, 1, 1) check_dist_emb_server_client(False, 1, 1)
check_dist_emb_server_client(True, 2, 2) # [TODO][Rhett] Tests for multiple groups may fail sometimes and
check_dist_emb_server_client(True, 1, 1, 2) # root cause is unknown. Let's disable them for now.
check_dist_emb_server_client(False, 1, 1, 2) #check_dist_emb_server_client(True, 2, 2)
check_dist_emb_server_client(True, 2, 2, 2) #check_dist_emb_server_client(True, 1, 1, 2)
#check_dist_emb_server_client(False, 1, 1, 2)
#check_dist_emb_server_client(True, 2, 2, 2)
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph") @unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support")
......
...@@ -826,7 +826,9 @@ def test_rpc_sampling_shuffle(num_server): ...@@ -826,7 +826,9 @@ def test_rpc_sampling_shuffle(num_server):
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ['DGL_DIST_MODE'] = 'distributed'
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
check_rpc_sampling_shuffle(Path(tmpdirname), num_server) check_rpc_sampling_shuffle(Path(tmpdirname), num_server)
check_rpc_sampling_shuffle(Path(tmpdirname), num_server, num_groups=2) # [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now.
#check_rpc_sampling_shuffle(Path(tmpdirname), num_server, num_groups=2)
check_rpc_hetero_sampling_shuffle(Path(tmpdirname), num_server) check_rpc_hetero_sampling_shuffle(Path(tmpdirname), num_server)
check_rpc_hetero_sampling_empty_shuffle(Path(tmpdirname), num_server) check_rpc_hetero_sampling_empty_shuffle(Path(tmpdirname), num_server)
check_rpc_hetero_etype_sampling_shuffle(Path(tmpdirname), num_server) check_rpc_hetero_etype_sampling_shuffle(Path(tmpdirname), num_server)
...@@ -1013,7 +1015,6 @@ if __name__ == "__main__": ...@@ -1013,7 +1015,6 @@ if __name__ == "__main__":
check_rpc_hetero_find_edges_shuffle(Path(tmpdirname), 2) check_rpc_hetero_find_edges_shuffle(Path(tmpdirname), 2)
check_rpc_in_subgraph_shuffle(Path(tmpdirname), 2) check_rpc_in_subgraph_shuffle(Path(tmpdirname), 2)
check_rpc_sampling_shuffle(Path(tmpdirname), 1) check_rpc_sampling_shuffle(Path(tmpdirname), 1)
check_rpc_sampling_shuffle(Path(tmpdirname), 2)
check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 1) check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 1)
check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 2) check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 2)
check_rpc_hetero_sampling_empty_shuffle(Path(tmpdirname), 1) check_rpc_hetero_sampling_empty_shuffle(Path(tmpdirname), 1)
......
...@@ -213,7 +213,7 @@ def check_neg_dataloader(g, tmpdir, num_server, num_workers): ...@@ -213,7 +213,7 @@ def check_neg_dataloader(g, tmpdir, num_server, num_workers):
@pytest.mark.parametrize("num_workers", [0, 4]) @pytest.mark.parametrize("num_workers", [0, 4])
@pytest.mark.parametrize("drop_last", [True, False]) @pytest.mark.parametrize("drop_last", [True, False])
@pytest.mark.parametrize("reshuffle", [True, False]) @pytest.mark.parametrize("reshuffle", [True, False])
@pytest.mark.parametrize("num_groups", [1, 2]) @pytest.mark.parametrize("num_groups", [1])
def test_dist_dataloader(tmpdir, num_server, num_workers, drop_last, reshuffle, num_groups): def test_dist_dataloader(tmpdir, num_server, num_workers, drop_last, reshuffle, num_groups):
reset_envs() reset_envs()
# No multiple partitions on single machine for # No multiple partitions on single machine for
...@@ -456,7 +456,7 @@ if __name__ == "__main__": ...@@ -456,7 +456,7 @@ if __name__ == "__main__":
test_dataloader(Path(tmpdirname), 3, 4, 'node') test_dataloader(Path(tmpdirname), 3, 4, 'node')
test_dataloader(Path(tmpdirname), 3, 4, 'edge') test_dataloader(Path(tmpdirname), 3, 4, 'edge')
test_neg_dataloader(Path(tmpdirname), 3, 4) test_neg_dataloader(Path(tmpdirname), 3, 4)
for num_groups in [1, 2]: for num_groups in [1]:
test_dist_dataloader(Path(tmpdirname), 3, 0, True, True, num_groups) test_dist_dataloader(Path(tmpdirname), 3, 0, True, True, num_groups)
test_dist_dataloader(Path(tmpdirname), 3, 4, True, True, num_groups) test_dist_dataloader(Path(tmpdirname), 3, 4, True, True, num_groups)
test_dist_dataloader(Path(tmpdirname), 3, 0, True, False, num_groups) test_dist_dataloader(Path(tmpdirname), 3, 0, True, False, num_groups)
......
...@@ -344,7 +344,7 @@ def test_multi_thread_rpc(): ...@@ -344,7 +344,7 @@ def test_multi_thread_rpc():
start_client_multithread("rpc_ip_config_multithread.txt") start_client_multithread("rpc_ip_config_multithread.txt")
pserver.join() pserver.join()
@unittest.skipIf(True, reason="Tests of multiple groups may fail and let's disable them for now.")
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
def test_multi_client_groups(): def test_multi_client_groups():
reset_envs() reset_envs()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment