Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
efa95d86
Unverified
Commit
efa95d86
authored
Aug 13, 2020
by
Da Zheng
Committed by
GitHub
Aug 13, 2020
Browse files
[Distributed] Fix bugs (#2009)
* fix. * add tests. * fix. * fix.
parent
1ad46fd0
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
2 deletions
+20
-2
python/dgl/dataloading/neighbor.py
python/dgl/dataloading/neighbor.py
+3
-1
python/dgl/distributed/dist_tensor.py
python/dgl/distributed/dist_tensor.py
+9
-1
tests/distributed/test_dist_graph_store.py
tests/distributed/test_dist_graph_store.py
+8
-0
No files found.
python/dgl/dataloading/neighbor.py
View file @
efa95d86
...
@@ -61,7 +61,9 @@ class MultiLayerNeighborSampler(BlockSampler):
...
@@ -61,7 +61,9 @@ class MultiLayerNeighborSampler(BlockSampler):
fanout
=
self
.
fanouts
[
block_id
]
fanout
=
self
.
fanouts
[
block_id
]
if
isinstance
(
g
,
distributed
.
DistGraph
):
if
isinstance
(
g
,
distributed
.
DistGraph
):
if
fanout
is
None
:
if
fanout
is
None
:
frontier
=
distributed
.
in_subgraph
(
g
,
seed_nodes
)
# TODO(zhengda) There is a bug in the distributed version of in_subgraph.
# let's use sample_neighbors to replace in_subgraph for now.
frontier
=
distributed
.
sample_neighbors
(
g
,
seed_nodes
,
-
1
,
replace
=
False
)
else
:
else
:
frontier
=
distributed
.
sample_neighbors
(
g
,
seed_nodes
,
fanout
,
replace
=
self
.
replace
)
frontier
=
distributed
.
sample_neighbors
(
g
,
seed_nodes
,
fanout
,
replace
=
self
.
replace
)
else
:
else
:
...
...
python/dgl/distributed/dist_tensor.py
View file @
efa95d86
...
@@ -4,6 +4,7 @@ import os
...
@@ -4,6 +4,7 @@ import os
from
.dist_context
import
is_initialized
from
.dist_context
import
is_initialized
from
.kvstore
import
get_kvstore
from
.kvstore
import
get_kvstore
from
.role
import
get_role
from
..
import
utils
from
..
import
utils
from
..
import
backend
as
F
from
..
import
backend
as
F
...
@@ -17,6 +18,9 @@ def _get_data_name(name, part_policy):
...
@@ -17,6 +18,9 @@ def _get_data_name(name, part_policy):
def
_default_init_data
(
shape
,
dtype
):
def
_default_init_data
(
shape
,
dtype
):
return
F
.
zeros
(
shape
,
dtype
,
F
.
cpu
())
return
F
.
zeros
(
shape
,
dtype
,
F
.
cpu
())
# These Ids can identify the anonymous distributed tensors.
DIST_TENSOR_ID
=
0
class
DistTensor
:
class
DistTensor
:
''' Distributed tensor.
''' Distributed tensor.
...
@@ -80,7 +84,11 @@ class DistTensor:
...
@@ -80,7 +84,11 @@ class DistTensor:
# We need to generate the name in a deterministic way.
# We need to generate the name in a deterministic way.
if
name
is
None
:
if
name
is
None
:
assert
not
persistent
,
'We cannot generate anonymous persistent distributed tensors'
assert
not
persistent
,
'We cannot generate anonymous persistent distributed tensors'
name
=
'anonymous-'
+
str
(
len
(
exist_names
)
+
1
)
global
DIST_TENSOR_ID
# All processes of the same role should create DistTensor synchronously.
# Thus, all of them should have the same Ids.
name
=
'anonymous-'
+
get_role
()
+
'-'
+
str
(
DIST_TENSOR_ID
)
DIST_TENSOR_ID
+=
1
self
.
_name
=
_get_data_name
(
name
,
part_policy
.
policy_str
)
self
.
_name
=
_get_data_name
(
name
,
part_policy
.
policy_str
)
self
.
_persistent
=
persistent
self
.
_persistent
=
persistent
if
self
.
_name
not
in
exist_names
:
if
self
.
_name
not
in
exist_names
:
...
...
tests/distributed/test_dist_graph_store.py
View file @
efa95d86
...
@@ -108,6 +108,14 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
...
@@ -108,6 +108,14 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
test3
=
dgl
.
distributed
.
DistTensor
((
g
.
number_of_nodes
(),
3
),
F
.
float32
,
'test3'
)
test3
=
dgl
.
distributed
.
DistTensor
((
g
.
number_of_nodes
(),
3
),
F
.
float32
,
'test3'
)
del
test3
del
test3
# add tests for anonymous distributed tensor.
test3
=
dgl
.
distributed
.
DistTensor
(
new_shape
,
F
.
float32
,
init_func
=
rand_init
)
data
=
test3
[
0
:
10
]
test4
=
dgl
.
distributed
.
DistTensor
(
new_shape
,
F
.
float32
,
init_func
=
rand_init
)
del
test3
test5
=
dgl
.
distributed
.
DistTensor
(
new_shape
,
F
.
float32
,
init_func
=
rand_init
)
assert
np
.
sum
(
F
.
asnumpy
(
test5
[
0
:
10
]
!=
data
))
>
0
# test a persistent tesnor
# test a persistent tesnor
test4
=
dgl
.
distributed
.
DistTensor
(
new_shape
,
F
.
float32
,
'test4'
,
init_func
=
rand_init
,
test4
=
dgl
.
distributed
.
DistTensor
(
new_shape
,
F
.
float32
,
'test4'
,
init_func
=
rand_init
,
persistent
=
True
)
persistent
=
True
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment