Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
abcc9cce
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "80fd9260bb12911bc702ab2886971a89b45399fc"
Unverified
Commit
abcc9cce
authored
Jun 09, 2022
by
Rhett Ying
Committed by
GitHub
Jun 09, 2022
Browse files
disable multiple groups tests due to random failure in CI (#4101)
parent
549df65a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
20 additions
and
14 deletions
+20
-14
python/dgl/distributed/rpc_server.py
python/dgl/distributed/rpc_server.py
+2
-1
tests/distributed/test_dist_graph_store.py
tests/distributed/test_dist_graph_store.py
+12
-8
tests/distributed/test_distributed_sampling.py
tests/distributed/test_distributed_sampling.py
+3
-2
tests/distributed/test_mp_dataloader.py
tests/distributed/test_mp_dataloader.py
+2
-2
tests/distributed/test_rpc.py
tests/distributed/test_rpc.py
+1
-1
No files found.
python/dgl/distributed/rpc_server.py
View file @
abcc9cce
...
@@ -44,7 +44,8 @@ def start_server(server_id, ip_config, num_servers, num_clients, server_state, \
...
@@ -44,7 +44,8 @@ def start_server(server_id, ip_config, num_servers, num_clients, server_state, \
assert
net_type
==
'tensorpipe'
,
\
assert
net_type
==
'tensorpipe'
,
\
"net_type can only be 'tensorpipe' if 'keep_alive' is enabled."
"net_type can only be 'tensorpipe' if 'keep_alive' is enabled."
print
(
"As configured, this server will keep alive for multiple"
print
(
"As configured, this server will keep alive for multiple"
" client groups until force shutdown request is received."
)
" client groups until force shutdown request is received."
" [WARNING] This feature is experimental and not fully tested."
)
# Register signal handler.
# Register signal handler.
rpc
.
register_sig_handler
()
rpc
.
register_sig_handler
()
# Register some basic services
# Register some basic services
...
...
tests/distributed/test_dist_graph_store.py
View file @
abcc9cce
...
@@ -586,10 +586,12 @@ def test_server_client():
...
@@ -586,10 +586,12 @@ def test_server_client():
check_server_client_hetero
(
False
,
1
,
1
)
check_server_client_hetero
(
False
,
1
,
1
)
check_server_client
(
True
,
1
,
1
)
check_server_client
(
True
,
1
,
1
)
check_server_client
(
False
,
1
,
1
)
check_server_client
(
False
,
1
,
1
)
check_server_client
(
True
,
2
,
2
)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and
check_server_client
(
True
,
1
,
1
,
2
)
# root cause is unknown. Let's disable them for now.
check_server_client
(
False
,
1
,
1
,
2
)
#check_server_client(True, 2, 2)
check_server_client
(
True
,
2
,
2
,
2
)
#check_server_client(True, 1, 1, 2)
#check_server_client(False, 1, 1, 2)
#check_server_client(True, 2, 2, 2)
@
unittest
.
skipIf
(
os
.
name
==
'nt'
,
reason
=
'Do not support windows yet'
)
@
unittest
.
skipIf
(
os
.
name
==
'nt'
,
reason
=
'Do not support windows yet'
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"tensorflow"
,
reason
=
"TF doesn't support distributed DistEmbedding"
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"tensorflow"
,
reason
=
"TF doesn't support distributed DistEmbedding"
)
...
@@ -599,10 +601,12 @@ def test_dist_emb_server_client():
...
@@ -599,10 +601,12 @@ def test_dist_emb_server_client():
os
.
environ
[
'DGL_DIST_MODE'
]
=
'distributed'
os
.
environ
[
'DGL_DIST_MODE'
]
=
'distributed'
check_dist_emb_server_client
(
True
,
1
,
1
)
check_dist_emb_server_client
(
True
,
1
,
1
)
check_dist_emb_server_client
(
False
,
1
,
1
)
check_dist_emb_server_client
(
False
,
1
,
1
)
check_dist_emb_server_client
(
True
,
2
,
2
)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and
check_dist_emb_server_client
(
True
,
1
,
1
,
2
)
# root cause is unknown. Let's disable them for now.
check_dist_emb_server_client
(
False
,
1
,
1
,
2
)
#check_dist_emb_server_client(True, 2, 2)
check_dist_emb_server_client
(
True
,
2
,
2
,
2
)
#check_dist_emb_server_client(True, 1, 1, 2)
#check_dist_emb_server_client(False, 1, 1, 2)
#check_dist_emb_server_client(True, 2, 2, 2)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"tensorflow"
,
reason
=
"TF doesn't support some of operations in DistGraph"
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"tensorflow"
,
reason
=
"TF doesn't support some of operations in DistGraph"
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"mxnet"
,
reason
=
"Turn off Mxnet support"
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
==
"mxnet"
,
reason
=
"Turn off Mxnet support"
)
...
...
tests/distributed/test_distributed_sampling.py
View file @
abcc9cce
...
@@ -826,7 +826,9 @@ def test_rpc_sampling_shuffle(num_server):
...
@@ -826,7 +826,9 @@ def test_rpc_sampling_shuffle(num_server):
os
.
environ
[
'DGL_DIST_MODE'
]
=
'distributed'
os
.
environ
[
'DGL_DIST_MODE'
]
=
'distributed'
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
,
num_groups
=
2
)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now.
#check_rpc_sampling_shuffle(Path(tmpdirname), num_server, num_groups=2)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_hetero_sampling_empty_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_hetero_sampling_empty_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_hetero_etype_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
check_rpc_hetero_etype_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
...
@@ -1013,7 +1015,6 @@ if __name__ == "__main__":
...
@@ -1013,7 +1015,6 @@ if __name__ == "__main__":
check_rpc_hetero_find_edges_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_hetero_find_edges_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_in_subgraph_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_in_subgraph_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
1
)
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
1
)
check_rpc_sampling_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
1
)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
1
)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_hetero_sampling_shuffle
(
Path
(
tmpdirname
),
2
)
check_rpc_hetero_sampling_empty_shuffle
(
Path
(
tmpdirname
),
1
)
check_rpc_hetero_sampling_empty_shuffle
(
Path
(
tmpdirname
),
1
)
...
...
tests/distributed/test_mp_dataloader.py
View file @
abcc9cce
...
@@ -213,7 +213,7 @@ def check_neg_dataloader(g, tmpdir, num_server, num_workers):
...
@@ -213,7 +213,7 @@ def check_neg_dataloader(g, tmpdir, num_server, num_workers):
@
pytest
.
mark
.
parametrize
(
"num_workers"
,
[
0
,
4
])
@
pytest
.
mark
.
parametrize
(
"num_workers"
,
[
0
,
4
])
@
pytest
.
mark
.
parametrize
(
"drop_last"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"drop_last"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"reshuffle"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"reshuffle"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"num_groups"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"num_groups"
,
[
1
])
def
test_dist_dataloader
(
tmpdir
,
num_server
,
num_workers
,
drop_last
,
reshuffle
,
num_groups
):
def
test_dist_dataloader
(
tmpdir
,
num_server
,
num_workers
,
drop_last
,
reshuffle
,
num_groups
):
reset_envs
()
reset_envs
()
# No multiple partitions on single machine for
# No multiple partitions on single machine for
...
@@ -456,7 +456,7 @@ if __name__ == "__main__":
...
@@ -456,7 +456,7 @@ if __name__ == "__main__":
test_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
'node'
)
test_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
'node'
)
test_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
'edge'
)
test_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
'edge'
)
test_neg_dataloader
(
Path
(
tmpdirname
),
3
,
4
)
test_neg_dataloader
(
Path
(
tmpdirname
),
3
,
4
)
for
num_groups
in
[
1
,
2
]:
for
num_groups
in
[
1
]:
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
0
,
True
,
True
,
num_groups
)
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
0
,
True
,
True
,
num_groups
)
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
True
,
True
,
num_groups
)
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
4
,
True
,
True
,
num_groups
)
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
0
,
True
,
False
,
num_groups
)
test_dist_dataloader
(
Path
(
tmpdirname
),
3
,
0
,
True
,
False
,
num_groups
)
...
...
tests/distributed/test_rpc.py
View file @
abcc9cce
...
@@ -344,7 +344,7 @@ def test_multi_thread_rpc():
...
@@ -344,7 +344,7 @@ def test_multi_thread_rpc():
start_client_multithread
(
"rpc_ip_config_multithread.txt"
)
start_client_multithread
(
"rpc_ip_config_multithread.txt"
)
pserver
.
join
()
pserver
.
join
()
@
unittest
.
skipIf
(
True
,
reason
=
"Tests of multiple groups may fail and let's disable them for now."
)
@
unittest
.
skipIf
(
os
.
name
==
'nt'
,
reason
=
'Do not support windows yet'
)
@
unittest
.
skipIf
(
os
.
name
==
'nt'
,
reason
=
'Do not support windows yet'
)
def
test_multi_client_groups
():
def
test_multi_client_groups
():
reset_envs
()
reset_envs
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment