Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
3a2a5031
Unverified
Commit
3a2a5031
authored
Jan 06, 2023
by
peizhou001
Committed by
GitHub
Jan 06, 2023
Browse files
[API Deprecation]Deprecate candidates in dgl.distributed (#5116)
parent
ab0c0ec6
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
234 additions
and
578 deletions
+234
-578
python/dgl/distributed/dist_graph.py
python/dgl/distributed/dist_graph.py
+2
-2
python/dgl/distributed/graph_partition_book.py
python/dgl/distributed/graph_partition_book.py
+7
-271
python/dgl/distributed/partition.py
python/dgl/distributed/partition.py
+119
-177
tests/dist/python/run_dist_objects.py
tests/dist/python/run_dist_objects.py
+1
-1
tests/distributed/test_dist_graph_store.py
tests/distributed/test_dist_graph_store.py
+6
-6
tests/distributed/test_distributed_sampling.py
tests/distributed/test_distributed_sampling.py
+26
-29
tests/distributed/test_mp_dataloader.py
tests/distributed/test_mp_dataloader.py
+1
-6
tests/distributed/test_new_kvstore.py
tests/distributed/test_new_kvstore.py
+8
-6
tests/distributed/test_partition.py
tests/distributed/test_partition.py
+63
-79
tests/pytorch/test_dist_optim.py
tests/pytorch/test_dist_optim.py
+1
-1
No files found.
python/dgl/distributed/dist_graph.py
View file @
3a2a5031
...
...
@@ -430,7 +430,7 @@ class DistGraph:
The example shows the creation of ``DistGraph`` in the standalone mode.
>>> dgl.distributed.partition_graph(g, 'graph_name', 1, num_hops=1, part_method='metis',
... out_path='output/'
, reshuffle=True
)
... out_path='output/')
>>> g = dgl.distributed.DistGraph('graph_name', part_config='output/graph_name.json')
The example shows the creation of ``DistGraph`` in the distributed mode.
...
...
@@ -513,7 +513,7 @@ class DistGraph:
assert
self
.
_client
is
not
None
,
\
'Distributed module is not initialized. Please call dgl.distributed.initialize.'
self
.
_g
=
_get_graph_from_shared_mem
(
self
.
graph_name
)
self
.
_gpb
=
get_shared_mem_partition_book
(
self
.
graph_name
,
self
.
_g
)
self
.
_gpb
=
get_shared_mem_partition_book
(
self
.
graph_name
)
if
self
.
_gpb
is
None
:
self
.
_gpb
=
gpb
self
.
_client
.
map_shared_data
(
self
.
_gpb
)
...
...
python/dgl/distributed/graph_partition_book.py
View file @
3a2a5031
...
...
@@ -8,7 +8,7 @@ import numpy as np
from
..
import
backend
as
F
from
..
import
utils
from
.._ffi.ndarray
import
empty_shared_mem
from
..base
import
EID
,
NID
,
DGLError
from
..base
import
DGLError
from
..ndarray
import
exist_shared_mem_array
from
..partition
import
NDArrayPartition
from
.constants
import
DEFAULT_ETYPE
,
DEFAULT_NTYPE
...
...
@@ -175,7 +175,7 @@ def _get_shared_mem_metadata(graph_name):
return
is_range_part
,
part_id
,
num_partitions
,
node_map
,
edge_map
def
get_shared_mem_partition_book
(
graph_name
,
graph_part
):
def
get_shared_mem_partition_book
(
graph_name
):
"""Get a graph partition book from shared memory.
A graph partition book of a specific graph can be serialized to shared memory.
...
...
@@ -185,8 +185,6 @@ def get_shared_mem_partition_book(graph_name, graph_part):
----------
graph_name : str
The name of the graph.
graph_part : DGLGraph
The graph structure of a partition.
Returns
-------
...
...
@@ -225,9 +223,7 @@ def get_shared_mem_partition_book(graph_name, graph_part):
part_id
,
num_parts
,
node_map
,
edge_map
,
ntypes
,
etypes
)
else
:
return
BasicPartitionBook
(
part_id
,
num_parts
,
node_map_data
,
edge_map_data
,
graph_part
)
raise
TypeError
(
"Only RangePartitionBook is supported currently."
)
def
get_node_partition_from_book
(
book
,
device
):
...
...
@@ -278,14 +274,10 @@ class GraphPartitionBook(ABC):
* the node IDs and the edge IDs that a partition has.
* the local IDs of nodes and edges in a partition.
Currently, there are two classes that implement ``GraphPartitionBook``:
``BasicGraphPartitionBook`` and ``RangePartitionBook``. ``BasicGraphPartitionBook``
stores the mappings between every individual node/edge ID and partition ID on
every machine, which usually consumes a lot of memory, while ``RangePartitionBook``
calculates the mapping between node/edge IDs and partition IDs based on some small
metadata because nodes/edges have been relabeled to have IDs in the same partition
fall in a contiguous ID range. ``RangePartitionBook`` is usually a preferred way to
provide mappings between node/edge IDs and partition IDs.
Currently, only one class that implement ``GraphPartitionBook``
:``RangePartitionBook``. It calculates the mapping between node/edge IDs
and partition IDs based on some small metadata because nodes/edges have been
relabeled to have IDs in the same partition fall in a contiguous ID range.
A graph partition book is constructed automatically when a graph is partitioned.
When a graph partition is loaded, a graph partition book is loaded as well.
...
...
@@ -541,262 +533,6 @@ class GraphPartitionBook(ABC):
Homogeneous edge IDs.
"""
class
BasicPartitionBook
(
GraphPartitionBook
):
"""This provides the most flexible way to store parition information.
The partition book maintains the mapping of every single node IDs and edge IDs to
partition IDs. This is very flexible at the coast of large memory consumption.
On a large graph, the mapping consumes significant memory and this partition book
is not recommended.
Parameters
----------
part_id : int
partition ID of current partition book
num_parts : int
number of total partitions
node_map : tensor
global node ID mapping to partition ID
edge_map : tensor
global edge ID mapping to partition ID
part_graph : DGLGraph
The graph partition structure.
"""
def
__init__
(
self
,
part_id
,
num_parts
,
node_map
,
edge_map
,
part_graph
):
assert
part_id
>=
0
,
"part_id cannot be a negative number."
assert
num_parts
>
0
,
"num_parts must be greater than zero."
self
.
_part_id
=
int
(
part_id
)
self
.
_num_partitions
=
int
(
num_parts
)
self
.
_nid2partid
=
F
.
tensor
(
node_map
)
assert
(
F
.
dtype
(
self
.
_nid2partid
)
==
F
.
int64
),
"the node map must be stored in an integer array"
self
.
_eid2partid
=
F
.
tensor
(
edge_map
)
assert
(
F
.
dtype
(
self
.
_eid2partid
)
==
F
.
int64
),
"the edge map must be stored in an integer array"
# Get meta data of the partition book.
self
.
_partition_meta_data
=
[]
_
,
nid_count
=
np
.
unique
(
F
.
asnumpy
(
self
.
_nid2partid
),
return_counts
=
True
)
_
,
eid_count
=
np
.
unique
(
F
.
asnumpy
(
self
.
_eid2partid
),
return_counts
=
True
)
for
partid
in
range
(
self
.
_num_partitions
):
part_info
=
{}
part_info
[
"machine_id"
]
=
partid
part_info
[
"num_nodes"
]
=
int
(
nid_count
[
partid
])
part_info
[
"num_edges"
]
=
int
(
eid_count
[
partid
])
self
.
_partition_meta_data
.
append
(
part_info
)
# Get partid2nids
self
.
_partid2nids
=
[]
sorted_nid
=
F
.
tensor
(
np
.
argsort
(
F
.
asnumpy
(
self
.
_nid2partid
)))
start
=
0
for
offset
in
nid_count
:
part_nids
=
sorted_nid
[
start
:
start
+
offset
]
start
+=
offset
self
.
_partid2nids
.
append
(
part_nids
)
# Get partid2eids
self
.
_partid2eids
=
[]
sorted_eid
=
F
.
tensor
(
np
.
argsort
(
F
.
asnumpy
(
self
.
_eid2partid
)))
start
=
0
for
offset
in
eid_count
:
part_eids
=
sorted_eid
[
start
:
start
+
offset
]
start
+=
offset
self
.
_partid2eids
.
append
(
part_eids
)
# Get nidg2l
self
.
_nidg2l
=
[
None
]
*
self
.
_num_partitions
global_id
=
part_graph
.
ndata
[
NID
]
max_global_id
=
np
.
amax
(
F
.
asnumpy
(
global_id
))
# TODO(chao): support int32 index
g2l
=
F
.
zeros
((
max_global_id
+
1
),
F
.
int64
,
F
.
context
(
global_id
))
g2l
=
F
.
scatter_row
(
g2l
,
global_id
,
F
.
arange
(
0
,
len
(
global_id
)))
self
.
_nidg2l
[
self
.
_part_id
]
=
g2l
# Get eidg2l
self
.
_eidg2l
=
[
None
]
*
self
.
_num_partitions
global_id
=
part_graph
.
edata
[
EID
]
max_global_id
=
np
.
amax
(
F
.
asnumpy
(
global_id
))
# TODO(chao): support int32 index
g2l
=
F
.
zeros
((
max_global_id
+
1
),
F
.
int64
,
F
.
context
(
global_id
))
g2l
=
F
.
scatter_row
(
g2l
,
global_id
,
F
.
arange
(
0
,
len
(
global_id
)))
self
.
_eidg2l
[
self
.
_part_id
]
=
g2l
# node size and edge size
self
.
_edge_size
=
len
(
self
.
partid2eids
(
self
.
_part_id
))
self
.
_node_size
=
len
(
self
.
partid2nids
(
self
.
_part_id
))
def
shared_memory
(
self
,
graph_name
):
"""Move data to shared memory."""
(
self
.
_meta
,
self
.
_nid2partid
,
self
.
_eid2partid
,
)
=
_move_metadata_to_shared_mem
(
graph_name
,
self
.
_num_nodes
(),
self
.
_num_edges
(),
self
.
_part_id
,
self
.
_num_partitions
,
self
.
_nid2partid
,
self
.
_eid2partid
,
False
,
)
def
num_partitions
(
self
):
"""Return the number of partitions."""
return
self
.
_num_partitions
def
metadata
(
self
):
"""Return the partition meta data."""
return
self
.
_partition_meta_data
def
_num_nodes
(
self
,
ntype
=
DEFAULT_NTYPE
):
"""The total number of nodes"""
assert
(
ntype
==
DEFAULT_NTYPE
),
"Base partition book only supports homogeneous graph."
return
len
(
self
.
_nid2partid
)
def
_num_edges
(
self
,
etype
=
DEFAULT_ETYPE
):
"""The total number of edges"""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
return
len
(
self
.
_eid2partid
)
def
map_to_per_ntype
(
self
,
ids
):
"""Map global homogeneous node IDs to node type IDs.
Returns
type_ids, per_type_ids
"""
return
F
.
zeros
((
len
(
ids
),),
F
.
int32
,
F
.
cpu
()),
ids
def
map_to_per_etype
(
self
,
ids
):
"""Map global homogeneous edge IDs to edge type IDs.
Returns
type_ids, per_type_ids
"""
return
F
.
zeros
((
len
(
ids
),),
F
.
int32
,
F
.
cpu
()),
ids
def
map_to_homo_nid
(
self
,
ids
,
ntype
=
DEFAULT_NTYPE
):
"""Map per-node-type IDs to global node IDs in the homogeneous format."""
assert
(
ntype
==
DEFAULT_NTYPE
),
"Base partition book only supports homogeneous graph."
return
ids
def
map_to_homo_eid
(
self
,
ids
,
etype
=
DEFAULT_ETYPE
):
"""Map per-edge-type IDs to global edge IDs in the homoenegeous format."""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
return
ids
def
nid2partid
(
self
,
nids
,
ntype
=
DEFAULT_NTYPE
):
"""From global node IDs to partition IDs"""
assert
(
ntype
==
DEFAULT_NTYPE
),
"Base partition book only supports homogeneous graph."
return
F
.
gather_row
(
self
.
_nid2partid
,
nids
)
def
eid2partid
(
self
,
eids
,
etype
=
DEFAULT_ETYPE
):
"""From global edge IDs to partition IDs"""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
return
F
.
gather_row
(
self
.
_eid2partid
,
eids
)
def
partid2nids
(
self
,
partid
,
ntype
=
DEFAULT_NTYPE
):
"""From partition id to global node IDs"""
assert
(
ntype
==
DEFAULT_NTYPE
),
"Base partition book only supports homogeneous graph."
return
self
.
_partid2nids
[
partid
]
def
partid2eids
(
self
,
partid
,
etype
=
DEFAULT_ETYPE
):
"""From partition id to global edge IDs"""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
return
self
.
_partid2eids
[
partid
]
def
nid2localnid
(
self
,
nids
,
partid
,
ntype
=
DEFAULT_NTYPE
):
"""Get local node IDs within the given partition."""
assert
(
ntype
==
DEFAULT_NTYPE
),
"Base partition book only supports homogeneous graph."
if
partid
!=
self
.
_part_id
:
raise
RuntimeError
(
"Now GraphPartitionBook does not support
\
getting remote tensor of nid2localnid."
)
return
F
.
gather_row
(
self
.
_nidg2l
[
partid
],
nids
)
def
eid2localeid
(
self
,
eids
,
partid
,
etype
=
DEFAULT_ETYPE
):
"""Get the local edge ids within the given partition."""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
if
partid
!=
self
.
_part_id
:
raise
RuntimeError
(
"Now GraphPartitionBook does not support
\
getting remote tensor of eid2localeid."
)
return
F
.
gather_row
(
self
.
_eidg2l
[
partid
],
eids
)
@
property
def
partid
(
self
):
"""Get the current partition ID"""
return
self
.
_part_id
@
property
def
ntypes
(
self
):
"""Get the list of node types"""
return
[
DEFAULT_NTYPE
]
@
property
def
etypes
(
self
):
"""Get the list of edge types"""
return
[
DEFAULT_ETYPE
[
1
]]
@
property
def
canonical_etypes
(
self
):
"""Get the list of canonical edge types
Returns
-------
list[(str, str, str)]
A list of canonical etypes
"""
return
[
DEFAULT_ETYPE
]
def
to_canonical_etype
(
self
,
etype
):
"""Convert an edge type to the corresponding canonical edge type.
Parameters
----------
etype : str or (str, str, str)
The edge type
Returns
-------
(str, str, str)
The corresponding canonical edge type
"""
assert
etype
in
(
DEFAULT_ETYPE
,
DEFAULT_ETYPE
[
1
],
),
"Base partition book only supports homogeneous graph."
return
self
.
canonical_etypes
[
0
]
class
RangePartitionBook
(
GraphPartitionBook
):
"""This partition book supports more efficient storage of partition information.
...
...
python/dgl/distributed/partition.py
View file @
3a2a5031
...
...
@@ -6,7 +6,7 @@ import time
import
numpy
as
np
from
..
import
backend
as
F
from
..base
import
NID
,
EID
,
NTYPE
,
ETYPE
,
dgl_warning
,
DGLError
from
..base
import
NID
,
EID
,
NTYPE
,
ETYPE
,
DGLError
from
..convert
import
to_homogeneous
from
..random
import
choice
as
random_choice
from
..transforms
import
sort_csr_by_tag
,
sort_csc_by_tag
...
...
@@ -18,7 +18,6 @@ from ..partition import (
)
from
.constants
import
DEFAULT_ETYPE
,
DEFAULT_NTYPE
from
.graph_partition_book
import
(
BasicPartitionBook
,
RangePartitionBook
,
_etype_tuple_to_str
,
_etype_str_to_tuple
,
...
...
@@ -173,7 +172,7 @@ def load_partition(part_config, part_id, load_feats=True):
assert
NID
in
graph
.
ndata
,
"the partition graph should contain node mapping to global node ID"
assert
EID
in
graph
.
edata
,
"the partition graph should contain edge mapping to global edge ID"
gpb
,
graph_name
,
ntypes
,
etypes
=
load_partition_book
(
part_config
,
part_id
,
graph
)
gpb
,
graph_name
,
ntypes
,
etypes
=
load_partition_book
(
part_config
,
part_id
)
ntypes_list
=
list
(
ntypes
.
keys
())
etypes_list
=
list
(
etypes
.
keys
())
if
'DGL_DIST_DEBUG'
in
os
.
environ
:
...
...
@@ -268,7 +267,7 @@ def load_partition_feats(part_config, part_id, load_nodes=True, load_edges=True)
return
node_feats
,
edge_feats
def
load_partition_book
(
part_config
,
part_id
,
graph
=
None
):
def
load_partition_book
(
part_config
,
part_id
):
'''Load a graph partition book from the partition config file.
Parameters
...
...
@@ -277,8 +276,6 @@ def load_partition_book(part_config, part_id, graph=None):
The path of the partition config file.
part_id : int
The partition ID.
graph : DGLGraph
The graph structure
Returns
-------
...
...
@@ -333,18 +330,15 @@ def load_partition_book(part_config, part_id, graph=None):
for
key
in
edge_map
:
assert
key
in
etypes
,
'The edge type {} is invalid'
.
format
(
key
)
if
is_range_part
:
if
not
is_range_part
:
raise
TypeError
(
"Only RangePartitionBook is supported currently."
)
node_map
=
_get_part_ranges
(
node_map
)
edge_map
=
_get_part_ranges
(
edge_map
)
return
RangePartitionBook
(
part_id
,
num_parts
,
node_map
,
edge_map
,
ntypes
,
etypes
),
\
part_metadata
[
'graph_name'
],
ntypes
,
etypes
else
:
node_map
=
np
.
load
(
node_map
)
edge_map
=
np
.
load
(
edge_map
)
return
BasicPartitionBook
(
part_id
,
num_parts
,
node_map
,
edge_map
,
graph
),
\
part_metadata
[
'graph_name'
],
ntypes
,
etypes
def
_get_orig_ids
(
g
,
sim_g
,
reshuffle
,
orig_nids
,
orig_eids
):
def
_get_orig_ids
(
g
,
sim_g
,
orig_nids
,
orig_eids
):
'''Convert/construct the original node IDs and edge IDs.
It handles multiple cases:
...
...
@@ -361,8 +355,6 @@ def _get_orig_ids(g, sim_g, reshuffle, orig_nids, orig_eids):
The input graph for partitioning.
sim_g : DGLGraph
The homogeneous version of the input graph.
reshuffle : bool
Whether the input graph is reshuffled during partitioning.
orig_nids : tensor or None
The original node IDs after the input graph is reshuffled.
orig_eids : tensor or None
...
...
@@ -373,7 +365,7 @@ def _get_orig_ids(g, sim_g, reshuffle, orig_nids, orig_eids):
tensor or dict of tensors, tensor or dict of tensors
'''
is_hetero
=
not
g
.
is_homogeneous
if
reshuffle
and
is_hetero
:
if
is_hetero
:
# Get the type IDs
orig_ntype
=
F
.
gather_row
(
sim_g
.
ndata
[
NTYPE
],
orig_nids
)
orig_etype
=
F
.
gather_row
(
sim_g
.
edata
[
ETYPE
],
orig_eids
)
...
...
@@ -384,12 +376,6 @@ def _get_orig_ids(g, sim_g, reshuffle, orig_nids, orig_eids):
for
ntype
in
g
.
ntypes
}
orig_eids
=
{
etype
:
F
.
boolean_mask
(
orig_eids
,
orig_etype
==
g
.
get_etype_id
(
etype
))
\
for
etype
in
g
.
canonical_etypes
}
elif
not
reshuffle
and
not
is_hetero
:
orig_nids
=
F
.
arange
(
0
,
sim_g
.
number_of_nodes
())
orig_eids
=
F
.
arange
(
0
,
sim_g
.
number_of_edges
())
elif
not
reshuffle
:
orig_nids
=
{
ntype
:
F
.
arange
(
0
,
g
.
number_of_nodes
(
ntype
))
for
ntype
in
g
.
ntypes
}
orig_eids
=
{
etype
:
F
.
arange
(
0
,
g
.
number_of_edges
(
etype
))
for
etype
in
g
.
canonical_etypes
}
return
orig_nids
,
orig_eids
def
_set_trainer_ids
(
g
,
sim_g
,
node_parts
):
...
...
@@ -425,9 +411,8 @@ def _set_trainer_ids(g, sim_g, node_parts):
g
.
edges
[
c_etype
].
data
[
'trainer_id'
]
=
trainer_id
def
partition_graph
(
g
,
graph_name
,
num_parts
,
out_path
,
num_hops
=
1
,
part_method
=
"metis"
,
reshuffle
=
True
,
balance_ntypes
=
None
,
balance_edges
=
False
,
return_mapping
=
False
,
num_trainers_per_machine
=
1
,
objtype
=
'cut'
,
graph_formats
=
None
):
balance_ntypes
=
None
,
balance_edges
=
False
,
return_mapping
=
False
,
num_trainers_per_machine
=
1
,
objtype
=
'cut'
,
graph_formats
=
None
):
''' Partition a graph for distributed training and store the partitions on files.
The partitioning occurs in three steps: 1) run a partition algorithm (e.g., Metis) to
...
...
@@ -506,16 +491,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
* ``num_edges`` is the number of edges in the global graph.
* `part-*` stores the data of a partition.
If ``reshuffle=False``, node IDs and edge IDs of a partition do not fall into contiguous
ID ranges. In this case, DGL stores node/edge mappings (from
node/edge IDs to partition IDs) in separate files (node_map.npy and edge_map.npy).
The node/edge mappings are stored in numpy files.
.. warning::
this format is deprecated and will not be supported by the next release. In other words,
the future release will always shuffle node IDs and edge IDs when partitioning a graph.
If ``reshuffle=True``, ``node_map`` and ``edge_map`` contains the information
As node/edge IDs are reshuffled, ``node_map`` and ``edge_map`` contains the information
for mapping between global node/edge IDs to partition-local node/edge IDs.
For heterogeneous graphs, the information in ``node_map`` and ``edge_map`` can also be used
to compute node types and edge types. The format of the data in ``node_map`` and ``edge_map``
...
...
@@ -583,10 +559,6 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
The default value is 1.
part_method : str, optional
The partition method. It supports "random" and "metis". The default value is "metis".
reshuffle : bool, optional
Reshuffle nodes and edges so that nodes and edges in a partition are in
contiguous ID range. The default value is True. The argument is deprecated
and will be removed in the next release.
balance_ntypes : tensor, optional
Node type of each node. This is a 1D-array of integers. Its values indicates the node
type of each node. This argument is used by Metis partition. When the argument is
...
...
@@ -597,8 +569,8 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
Indicate whether to balance the edges in each partition. This argument is used by
the Metis algorithm.
return_mapping : bool
I
f `reshuffle=True`, this indicates
to return the mapping between shuffled node/edge IDs
and the original
node/edge IDs.
I
ndicate whether
to return the mapping between shuffled node/edge IDs
and the original
node/edge IDs.
num_trainers_per_machine : int, optional
The number of trainers per machine. If is not 1, the whole graph will be first partitioned
to each trainer, that is num_parts*num_trainers_per_machine parts. And the trainer ids of
...
...
@@ -630,7 +602,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
Examples
--------
>>> dgl.distributed.partition_graph(g, 'test', 4, num_hops=1, part_method='metis',
... out_path='output/',
reshuffle=True,
... out_path='output/',
... balance_ntypes=g.ndata['train_mask'],
... balance_edges=True)
>>> (
...
...
@@ -679,10 +651,6 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
if
objtype
not
in
[
'cut'
,
'vol'
]:
raise
ValueError
if
not
reshuffle
:
dgl_warning
(
"The argument reshuffle will be deprecated in the next release. "
"For heterogeneous graphs, reshuffle must be enabled."
)
if
num_parts
==
1
:
start
=
time
.
time
()
sim_g
,
balance_ntypes
=
get_homogeneous
(
g
,
balance_ntypes
)
...
...
@@ -708,11 +676,17 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
orig_eids
=
parts
[
0
].
edata
[
EID
]
=
F
.
arange
(
0
,
sim_g
.
number_of_edges
())
# For one partition, we don't really shuffle nodes and edges. We just need to simulate
# it and set node data and edge data of orig_id.
if
reshuffle
:
parts
[
0
].
ndata
[
'orig_id'
]
=
orig_nids
parts
[
0
].
edata
[
'orig_id'
]
=
orig_eids
if
return_mapping
:
orig_nids
,
orig_eids
=
_get_orig_ids
(
g
,
sim_g
,
False
,
orig_nids
,
orig_eids
)
if
g
.
is_homogeneous
:
orig_nids
=
F
.
arange
(
0
,
sim_g
.
number_of_nodes
())
orig_eids
=
F
.
arange
(
0
,
sim_g
.
number_of_edges
())
else
:
orig_nids
=
{
ntype
:
F
.
arange
(
0
,
g
.
number_of_nodes
(
ntype
))
for
ntype
in
g
.
ntypes
}
orig_eids
=
{
etype
:
F
.
arange
(
0
,
g
.
number_of_edges
(
etype
))
for
etype
in
g
.
canonical_etypes
}
parts
[
0
].
ndata
[
'inner_node'
]
=
F
.
ones
((
sim_g
.
number_of_nodes
(),),
RESERVED_FIELD_DTYPE
[
'inner_node'
],
F
.
cpu
())
parts
[
0
].
edata
[
'inner_edge'
]
=
F
.
ones
((
sim_g
.
number_of_edges
(),),
...
...
@@ -749,11 +723,11 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
node_parts
=
random_choice
(
num_parts
,
sim_g
.
number_of_nodes
())
start
=
time
.
time
()
parts
,
orig_nids
,
orig_eids
=
partition_graph_with_halo
(
sim_g
,
node_parts
,
num_hops
,
reshuffle
=
reshuffl
e
)
reshuffle
=
Tru
e
)
print
(
'Splitting the graph into partitions takes {:.3f}s, peak mem: {:.3f} GB'
.
format
(
time
.
time
()
-
start
,
get_peak_mem
()))
if
return_mapping
:
orig_nids
,
orig_eids
=
_get_orig_ids
(
g
,
sim_g
,
reshuffle
,
orig_nids
,
orig_eids
)
orig_nids
,
orig_eids
=
_get_orig_ids
(
g
,
sim_g
,
orig_nids
,
orig_eids
)
else
:
raise
Exception
(
'Unknown partitioning method: '
+
part_method
)
...
...
@@ -763,7 +737,6 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
# orig_id: the global node IDs in the homogeneous version of input graph.
# NID: the global node IDs in the reshuffled homogeneous version of the input graph.
if
not
g
.
is_homogeneous
:
if
reshuffle
:
for
name
in
parts
:
orig_ids
=
parts
[
name
].
ndata
[
'orig_id'
]
ntype
=
F
.
gather_row
(
sim_g
.
ndata
[
NTYPE
],
orig_ids
)
...
...
@@ -797,34 +770,11 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
typed_eids
=
np
.
sort
(
F
.
asnumpy
(
F
.
boolean_mask
(
inner_eids
,
inner_etype_mask
)))
assert
np
.
all
(
typed_eids
==
np
.
arange
(
int
(
typed_eids
[
0
]),
int
(
typed_eids
[
-
1
])
+
1
))
else
:
raise
NotImplementedError
(
'not shuffled case'
)
# Let's calculate edge assignment.
if
not
reshuffle
:
start
=
time
.
time
()
# We only optimize for reshuffled case. So it's fine to use int64 here.
edge_parts
=
np
.
zeros
((
g
.
number_of_edges
(),),
dtype
=
np
.
int64
)
-
1
for
part_id
in
parts
:
part
=
parts
[
part_id
]
# To get the edges in the input graph, we should use original node IDs.
local_edges
=
F
.
boolean_mask
(
part
.
edata
[
EID
],
part
.
edata
[
'inner_edge'
])
edge_parts
[
F
.
asnumpy
(
local_edges
)]
=
part_id
print
(
'Calculate edge assignment: {:.3f} seconds'
.
format
(
time
.
time
()
-
start
))
os
.
makedirs
(
out_path
,
mode
=
0o775
,
exist_ok
=
True
)
tot_num_inner_edges
=
0
out_path
=
os
.
path
.
abspath
(
out_path
)
# Without reshuffling, we have to store the entire node/edge mapping in a file.
if
not
reshuffle
:
node_part_file
=
os
.
path
.
join
(
out_path
,
"node_map"
)
edge_part_file
=
os
.
path
.
join
(
out_path
,
"edge_map"
)
np
.
save
(
node_part_file
,
F
.
asnumpy
(
node_parts
),
allow_pickle
=
False
)
np
.
save
(
edge_part_file
,
edge_parts
,
allow_pickle
=
False
)
node_map_val
=
node_part_file
+
".npy"
edge_map_val
=
edge_part_file
+
".npy"
else
:
# With reshuffling, we can ensure that all nodes and edges are reshuffled
# and are in contiguous ID space.
if
num_parts
>
1
:
...
...
@@ -902,7 +852,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
ntype_id
=
g
.
get_ntype_id
(
ntype
)
# To get the edges in the input graph, we should use original node IDs.
# Both orig_id and NID stores the per-node-type IDs.
ndata_name
=
'orig_id'
if
reshuffle
else
NID
ndata_name
=
'orig_id'
inner_node_mask
=
_get_inner_node_mask
(
part
,
ntype_id
)
# This is global node IDs.
local_nodes
=
F
.
boolean_mask
(
part
.
ndata
[
ndata_name
],
inner_node_mask
)
...
...
@@ -924,7 +874,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
for
etype
in
g
.
canonical_etypes
:
etype_id
=
g
.
get_etype_id
(
etype
)
edata_name
=
'orig_id'
if
reshuffle
else
EID
edata_name
=
'orig_id'
inner_edge_mask
=
_get_inner_edge_mask
(
part
,
etype_id
)
# This is global edge IDs.
local_edges
=
F
.
boolean_mask
(
part
.
edata
[
edata_name
],
inner_edge_mask
)
...
...
@@ -945,44 +895,36 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
g
.
edges
[
etype
].
data
[
name
],
local_edges
)
else
:
for
ntype
in
g
.
ntypes
:
if
reshuffle
and
len
(
g
.
ntypes
)
>
1
:
if
len
(
g
.
ntypes
)
>
1
:
ndata_name
=
'orig_id'
ntype_id
=
g
.
get_ntype_id
(
ntype
)
inner_node_mask
=
_get_inner_node_mask
(
part
,
ntype_id
)
# This is global node IDs.
local_nodes
=
F
.
boolean_mask
(
part
.
ndata
[
ndata_name
],
inner_node_mask
)
local_nodes
=
F
.
gather_row
(
sim_g
.
ndata
[
NID
],
local_nodes
)
el
if
reshuffl
e
:
el
s
e
:
local_nodes
=
sim_g
.
ndata
[
NID
]
for
name
in
g
.
nodes
[
ntype
].
data
:
if
name
in
[
NID
,
'inner_node'
]:
continue
if
reshuffle
:
node_feats
[
ntype
+
'/'
+
name
]
=
F
.
gather_row
(
g
.
nodes
[
ntype
].
data
[
name
],
local_nodes
)
else
:
node_feats
[
ntype
+
'/'
+
name
]
=
g
.
nodes
[
ntype
].
data
[
name
]
for
etype
in
g
.
canonical_etypes
:
if
reshuffle
and
not
g
.
is_homogeneous
:
if
not
g
.
is_homogeneous
:
edata_name
=
'orig_id'
etype_id
=
g
.
get_etype_id
(
etype
)
inner_edge_mask
=
_get_inner_edge_mask
(
part
,
etype_id
)
# This is global edge IDs.
local_edges
=
F
.
boolean_mask
(
part
.
edata
[
edata_name
],
inner_edge_mask
)
local_edges
=
F
.
gather_row
(
sim_g
.
edata
[
EID
],
local_edges
)
el
if
reshuffl
e
:
el
s
e
:
local_edges
=
sim_g
.
edata
[
EID
]
for
name
in
g
.
edges
[
etype
].
data
:
if
name
in
[
EID
,
'inner_edge'
]:
continue
if
reshuffle
:
edge_feats
[
_etype_tuple_to_str
(
etype
)
+
'/'
+
name
]
=
F
.
gather_row
(
g
.
edges
[
etype
].
data
[
name
],
local_edges
)
else
:
edge_feats
[
_etype_tuple_to_str
(
etype
)
+
'/'
+
name
]
=
\
g
.
edges
[
etype
].
data
[
name
]
# delete `orig_id` from ndata/edata
if
reshuffle
:
del
part
.
ndata
[
'orig_id'
]
del
part
.
edata
[
'orig_id'
]
...
...
tests/dist/python/run_dist_objects.py
View file @
3a2a5031
...
...
@@ -682,7 +682,7 @@ elif mode == "client":
dgl
.
distributed
.
initialize
(
ip_config
,
net_type
=
net_type
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
graph_path
+
"/{}.json"
.
format
(
graph_name
),
part_id
,
None
graph_path
+
"/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
dgl
.
distributed
.
DistGraph
(
graph_name
,
gpb
=
gpb
)
...
...
tests/distributed/test_dist_graph_store.py
View file @
3a2a5031
...
...
@@ -119,7 +119,7 @@ def run_client_empty(
os
.
environ
[
"DGL_NUM_SERVER"
]
=
str
(
server_count
)
dgl
.
distributed
.
initialize
(
"kv_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
check_dist_graph_empty
(
g
,
num_clients
,
num_nodes
,
num_edges
)
...
...
@@ -187,7 +187,7 @@ def run_client(
os
.
environ
[
"DGL_GROUP_ID"
]
=
str
(
group_id
)
dgl
.
distributed
.
initialize
(
"kv_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
check_dist_graph
(
g
,
num_clients
,
num_nodes
,
num_edges
)
...
...
@@ -206,7 +206,7 @@ def run_emb_client(
os
.
environ
[
"DGL_GROUP_ID"
]
=
str
(
group_id
)
dgl
.
distributed
.
initialize
(
"kv_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
check_dist_emb
(
g
,
num_clients
,
num_nodes
,
num_edges
)
...
...
@@ -230,7 +230,7 @@ def run_optim_client(
backend
=
"gloo"
,
rank
=
rank
,
world_size
=
world_size
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
check_dist_optim_store
(
rank
,
num_nodes
,
optimizer_states
,
save
)
...
...
@@ -279,7 +279,7 @@ def run_client_hierarchy(
os
.
environ
[
"DGL_NUM_SERVER"
]
=
str
(
server_count
)
dgl
.
distributed
.
initialize
(
"kv_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
node_mask
=
F
.
tensor
(
node_mask
)
...
...
@@ -687,7 +687,7 @@ def run_client_hetero(
os
.
environ
[
"DGL_NUM_SERVER"
]
=
str
(
server_count
)
dgl
.
distributed
.
initialize
(
"kv_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
check_dist_graph_hetero
(
g
,
num_clients
,
num_nodes
,
num_edges
)
...
...
tests/distributed/test_distributed_sampling.py
View file @
3a2a5031
...
...
@@ -101,7 +101,7 @@ def check_rpc_sampling(tmpdir, num_server):
num_hops
=
1
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
False
)
num_hops
=
num_hops
,
part_method
=
'metis'
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -132,7 +132,7 @@ def check_rpc_find_edges_shuffle(tmpdir, num_server):
orig_nid
,
orig_eid
=
partition_graph
(
g
,
'test_find_edges'
,
num_parts
,
tmpdir
,
num_hops
=
1
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -178,7 +178,7 @@ def check_rpc_hetero_find_edges_shuffle(tmpdir, num_server):
orig_nid
,
orig_eid
=
partition_graph
(
g
,
'test_find_edges'
,
num_parts
,
tmpdir
,
num_hops
=
1
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -227,7 +227,7 @@ def check_rpc_get_degree_shuffle(tmpdir, num_server):
num_parts
=
num_server
orig_nid
,
_
=
partition_graph
(
g
,
'test_get_degrees'
,
num_parts
,
tmpdir
,
num_hops
=
1
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
1
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -281,7 +281,7 @@ def check_rpc_sampling_shuffle(tmpdir, num_server, num_groups=1):
num_hops
=
1
orig_nids
,
orig_eids
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -379,7 +379,7 @@ def check_rpc_hetero_sampling_shuffle(tmpdir, num_server):
num_hops
=
1
orig_nid_map
,
orig_eid_map
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -431,7 +431,7 @@ def check_rpc_hetero_sampling_empty_shuffle(tmpdir, num_server):
orig_nids
,
_
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -461,7 +461,7 @@ def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server, graph_formats=No
num_hops
=
1
orig_nid_map
,
orig_eid_map
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
,
graph_formats
=
graph_formats
)
pserver_list
=
[]
...
...
@@ -515,7 +515,7 @@ def check_rpc_hetero_etype_sampling_empty_shuffle(tmpdir, num_server):
orig_nids
,
_
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -607,7 +607,7 @@ def check_rpc_bipartite_sampling_empty(tmpdir, num_server):
num_hops
=
1
orig_nids
,
_
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -641,7 +641,7 @@ def check_rpc_bipartite_sampling_shuffle(tmpdir, num_server):
num_hops
=
1
orig_nid_map
,
orig_eid_map
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -692,7 +692,7 @@ def check_rpc_bipartite_etype_sampling_empty(tmpdir, num_server):
num_hops
=
1
orig_nids
,
_
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -727,7 +727,7 @@ def check_rpc_bipartite_etype_sampling_shuffle(tmpdir, num_server):
num_hops
=
1
orig_nid_map
,
orig_eid_map
=
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
num_hops
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -795,7 +795,7 @@ def test_rpc_sampling_shuffle(num_server):
check_rpc_bipartite_etype_sampling_empty
(
Path
(
tmpdirname
),
num_server
)
check_rpc_bipartite_etype_sampling_shuffle
(
Path
(
tmpdirname
),
num_server
)
def
check_standalone_sampling
(
tmpdir
,
reshuffle
):
def
check_standalone_sampling
(
tmpdir
):
g
=
CitationGraphDataset
(
"cora"
)[
0
]
prob
=
np
.
maximum
(
np
.
random
.
randn
(
g
.
num_edges
()),
0
)
mask
=
(
prob
>
0
)
...
...
@@ -804,7 +804,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
num_parts
=
1
num_hops
=
1
partition_graph
(
g
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
reshuffle
)
num_hops
=
num_hops
,
part_method
=
'metis'
)
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
dgl
.
distributed
.
initialize
(
"rpc_ip_config.txt"
)
...
...
@@ -829,7 +829,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
assert
(
prob
[
eid
]
>
0
).
all
()
dgl
.
distributed
.
exit_client
()
def
check_standalone_etype_sampling
(
tmpdir
,
reshuffle
):
def
check_standalone_etype_sampling
(
tmpdir
):
hg
=
CitationGraphDataset
(
'cora'
)[
0
]
prob
=
np
.
maximum
(
np
.
random
.
randn
(
hg
.
num_edges
()),
0
)
mask
=
(
prob
>
0
)
...
...
@@ -839,7 +839,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
num_hops
=
1
partition_graph
(
hg
,
'test_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
reshuffle
)
num_hops
=
num_hops
,
part_method
=
'metis'
)
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
dgl
.
distributed
.
initialize
(
"rpc_ip_config.txt"
)
dist_graph
=
DistGraph
(
"test_sampling"
,
part_config
=
tmpdir
/
'test_sampling.json'
)
...
...
@@ -863,7 +863,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
assert
(
prob
[
eid
]
>
0
).
all
()
dgl
.
distributed
.
exit_client
()
def
check_standalone_etype_sampling_heterograph
(
tmpdir
,
reshuffle
):
def
check_standalone_etype_sampling_heterograph
(
tmpdir
):
hg
=
CitationGraphDataset
(
'cora'
)[
0
]
num_parts
=
1
num_hops
=
1
...
...
@@ -872,7 +872,7 @@ def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle):
(
'paper'
,
'cite-by'
,
'paper'
):
(
dst
,
src
)},
{
'paper'
:
hg
.
number_of_nodes
()})
partition_graph
(
new_hg
,
'test_hetero_sampling'
,
num_parts
,
tmpdir
,
num_hops
=
num_hops
,
part_method
=
'metis'
,
reshuffle
=
reshuffle
)
num_hops
=
num_hops
,
part_method
=
'metis'
)
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
dgl
.
distributed
.
initialize
(
"rpc_ip_config.txt"
)
dist_graph
=
DistGraph
(
"test_hetero_sampling"
,
part_config
=
tmpdir
/
'test_hetero_sampling.json'
)
...
...
@@ -892,8 +892,7 @@ def test_standalone_sampling():
import
tempfile
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
check_standalone_sampling
(
Path
(
tmpdirname
),
False
)
check_standalone_sampling
(
Path
(
tmpdirname
),
True
)
check_standalone_sampling
(
Path
(
tmpdirname
))
def
start_in_subgraph_client
(
rank
,
tmpdir
,
disable_shared_mem
,
nodes
):
gpb
=
None
...
...
@@ -917,7 +916,7 @@ def check_rpc_in_subgraph_shuffle(tmpdir, num_server):
num_parts
=
num_server
orig_nid
,
orig_eid
=
partition_graph
(
g
,
'test_in_subgraph'
,
num_parts
,
tmpdir
,
num_hops
=
1
,
part_method
=
'metis'
,
reshuffle
=
True
,
return_mapping
=
True
)
num_hops
=
1
,
part_method
=
'metis'
,
return_mapping
=
True
)
pserver_list
=
[]
ctx
=
mp
.
get_context
(
'spawn'
)
...
...
@@ -964,23 +963,21 @@ def test_standalone_etype_sampling():
import
tempfile
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
check_standalone_etype_sampling_heterograph
(
Path
(
tmpdirname
)
,
True
)
check_standalone_etype_sampling_heterograph
(
Path
(
tmpdirname
))
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
check_standalone_etype_sampling
(
Path
(
tmpdirname
)
,
True
)
check_standalone_etype_sampling
(
Path
(
tmpdirname
))
if
__name__
==
"__main__"
:
import
tempfile
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
check_standalone_etype_sampling_heterograph
(
Path
(
tmpdirname
)
,
True
)
check_standalone_etype_sampling_heterograph
(
Path
(
tmpdirname
))
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
os
.
environ
[
'DGL_DIST_MODE'
]
=
'standalone'
check_standalone_etype_sampling
(
Path
(
tmpdirname
),
True
)
check_standalone_etype_sampling
(
Path
(
tmpdirname
),
False
)
check_standalone_sampling
(
Path
(
tmpdirname
),
True
)
check_standalone_sampling
(
Path
(
tmpdirname
),
False
)
check_standalone_etype_sampling
(
Path
(
tmpdirname
))
check_standalone_sampling
(
Path
(
tmpdirname
))
os
.
environ
[
'DGL_DIST_MODE'
]
=
'distributed'
check_rpc_sampling
(
Path
(
tmpdirname
),
2
)
check_rpc_sampling
(
Path
(
tmpdirname
),
1
)
...
...
tests/distributed/test_mp_dataloader.py
View file @
3a2a5031
...
...
@@ -162,7 +162,6 @@ def test_standalone():
test_dir
,
num_hops
=
num_hops
,
part_method
=
"metis"
,
reshuffle
=
True
,
return_mapping
=
True
,
)
part_config
=
os
.
path
.
join
(
test_dir
,
"test_sampling.json"
)
...
...
@@ -262,7 +261,6 @@ def check_neg_dataloader(g, num_server, num_workers):
test_dir
,
num_hops
=
num_hops
,
part_method
=
"metis"
,
reshuffle
=
True
,
return_mapping
=
True
,
)
part_config
=
os
.
path
.
join
(
test_dir
,
"test_sampling.json"
)
...
...
@@ -317,10 +315,9 @@ def check_neg_dataloader(g, num_server, num_workers):
@
pytest
.
mark
.
parametrize
(
"num_server"
,
[
3
])
@
pytest
.
mark
.
parametrize
(
"num_workers"
,
[
0
,
4
])
@
pytest
.
mark
.
parametrize
(
"drop_last"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"reshuffle"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"num_groups"
,
[
1
])
def
test_dist_dataloader
(
num_server
,
num_workers
,
drop_last
,
reshuffle
,
num_groups
num_server
,
num_workers
,
drop_last
,
num_groups
):
reset_envs
()
# No multiple partitions on single machine for
...
...
@@ -343,7 +340,6 @@ def test_dist_dataloader(
test_dir
,
num_hops
=
num_hops
,
part_method
=
"metis"
,
reshuffle
=
reshuffle
,
return_mapping
=
True
,
)
...
...
@@ -560,7 +556,6 @@ def check_dataloader(g, num_server, num_workers, dataloader_type):
test_dir
,
num_hops
=
num_hops
,
part_method
=
"metis"
,
reshuffle
=
True
,
return_mapping
=
True
,
)
part_config
=
os
.
path
.
join
(
test_dir
,
"test_sampling.json"
)
...
...
tests/distributed/test_new_kvstore.py
View file @
3a2a5031
...
...
@@ -18,8 +18,8 @@ if os.name != "nt":
import
struct
# Create an one-part Graph
node_map
=
F
.
tensor
([
0
,
0
,
0
,
0
,
0
,
0
],
F
.
int64
)
edge_map
=
F
.
tensor
([
0
,
0
,
0
,
0
,
0
,
0
,
0
],
F
.
int64
)
node_map
=
{
'_N'
:
F
.
tensor
([
[
0
,
6
]
],
F
.
int64
)
}
edge_map
=
{(
'_N'
,
'_E'
,
'_N'
):
F
.
tensor
([
[
0
,
7
]
],
F
.
int64
)
}
global_nid
=
F
.
tensor
([
0
,
1
,
2
,
3
,
4
,
5
],
F
.
int64
)
global_eid
=
F
.
tensor
([
0
,
1
,
2
,
3
,
4
,
5
,
6
],
F
.
int64
)
...
...
@@ -36,8 +36,10 @@ g.add_edges(2, 5) # 6
g
.
ndata
[
dgl
.
NID
]
=
global_nid
g
.
edata
[
dgl
.
EID
]
=
global_eid
gpb
=
dgl
.
distributed
.
graph_partition_book
.
BasicPartitionBook
(
part_id
=
0
,
num_parts
=
1
,
node_map
=
node_map
,
edge_map
=
edge_map
,
part_graph
=
g
gpb
=
dgl
.
distributed
.
graph_partition_book
.
RangePartitionBook
(
part_id
=
0
,
num_parts
=
1
,
node_map
=
node_map
,
edge_map
=
edge_map
,
ntypes
=
{
ntype
:
i
for
i
,
ntype
in
enumerate
(
g
.
ntypes
)},
etypes
=
{
etype
:
i
for
i
,
etype
in
enumerate
(
g
.
canonical_etypes
)}
)
node_policy
=
dgl
.
distributed
.
PartitionPolicy
(
...
...
@@ -110,8 +112,8 @@ def test_partition_policy():
F
.
asnumpy
(
eid_partid
),
F
.
asnumpy
(
F
.
tensor
([
0
,
0
,
0
,
0
,
0
,
0
,
0
],
F
.
int64
)),
)
assert
node_policy
.
get_part_size
()
==
len
(
node_map
)
assert
edge_policy
.
get_part_size
()
==
len
(
edge_map
)
assert
node_policy
.
get_part_size
()
==
len
(
local_nid
)
assert
edge_policy
.
get_part_size
()
==
len
(
local_eid
)
def
start_server
(
server_id
,
num_clients
,
num_servers
):
...
...
tests/distributed/test_partition.py
View file @
3a2a5031
...
...
@@ -3,18 +3,20 @@ import os
import
backend
as
F
import
torch
as
th
import
dgl
import
json
import
numpy
as
np
import
pytest
import
tempfile
from
dgl
import
function
as
fn
from
dgl.distributed
import
(
load_partition
,
load_partition_book
,
load_partition_feats
,
partition_graph
,
)
from
dgl.distributed.graph_partition_book
import
(
DEFAULT_ETYPE
,
DEFAULT_NTYPE
,
BasicPartitionBook
,
EdgePartitionPolicy
,
HeteroDataName
,
NodePartitionPolicy
,
...
...
@@ -226,7 +228,6 @@ def check_hetero_partition(
"/tmp/partition"
,
num_hops
=
num_hops
,
part_method
=
part_method
,
reshuffle
=
True
,
return_mapping
=
True
,
num_trainers_per_machine
=
num_trainers_per_machine
,
graph_formats
=
graph_formats
,
...
...
@@ -328,7 +329,6 @@ def check_hetero_partition(
def
check_partition
(
g
,
part_method
,
reshuffle
,
num_parts
=
4
,
num_trainers_per_machine
=
1
,
load_feats
=
True
,
...
...
@@ -352,7 +352,6 @@ def check_partition(
"/tmp/partition"
,
num_hops
=
num_hops
,
part_method
=
part_method
,
reshuffle
=
reshuffle
,
return_mapping
=
True
,
num_trainers_per_machine
=
num_trainers_per_machine
,
graph_formats
=
graph_formats
,
...
...
@@ -445,7 +444,6 @@ def check_partition(
assert
F
.
shape
(
orig_eids1
)[
0
]
==
F
.
shape
(
orig_eids2
)[
0
]
assert
np
.
all
(
F
.
asnumpy
(
orig_eids1
)
==
F
.
asnumpy
(
orig_eids2
))
if
reshuffle
:
local_orig_nids
=
orig_nids
[
part_g
.
ndata
[
dgl
.
NID
]]
local_orig_eids
=
orig_eids
[
part_g
.
edata
[
dgl
.
EID
]]
part_g
.
ndata
[
"feats"
]
=
F
.
gather_row
(
...
...
@@ -456,13 +454,6 @@ def check_partition(
)
local_nodes
=
orig_nids
[
local_nodes
]
local_edges
=
orig_eids
[
local_edges
]
else
:
part_g
.
ndata
[
"feats"
]
=
F
.
gather_row
(
g
.
ndata
[
"feats"
],
part_g
.
ndata
[
dgl
.
NID
]
)
part_g
.
edata
[
"feats"
]
=
F
.
gather_row
(
g
.
edata
[
"feats"
],
part_g
.
edata
[
dgl
.
NID
]
)
part_g
.
update_all
(
fn
.
copy_u
(
"feats"
,
"msg"
),
fn
.
sum
(
"msg"
,
"h"
))
part_g
.
update_all
(
fn
.
copy_e
(
"feats"
,
"msg"
),
fn
.
sum
(
"msg"
,
"eh"
))
...
...
@@ -490,12 +481,10 @@ def check_partition(
assert
np
.
all
(
F
.
asnumpy
(
true_feats
)
==
F
.
asnumpy
(
edata
))
# This only works if node/edge IDs are shuffled.
if
reshuffle
:
shuffled_labels
.
append
(
node_feats
[
"_N/labels"
])
shuffled_edata
.
append
(
edge_feats
[
"_N:_E:_N/feats"
])
# Verify that we can reconstruct node/edge data for original IDs.
if
reshuffle
:
shuffled_labels
=
F
.
asnumpy
(
F
.
cat
(
shuffled_labels
,
0
))
shuffled_edata
=
F
.
asnumpy
(
F
.
cat
(
shuffled_edata
,
0
))
orig_labels
=
np
.
zeros
(
...
...
@@ -507,7 +496,6 @@ def check_partition(
assert
np
.
all
(
orig_labels
==
F
.
asnumpy
(
g
.
ndata
[
"labels"
]))
assert
np
.
all
(
orig_edata
==
F
.
asnumpy
(
g
.
edata
[
"feats"
]))
if
reshuffle
:
node_map
=
[]
edge_map
=
[]
for
i
,
(
num_nodes
,
num_edges
)
in
enumerate
(
part_sizes
):
...
...
@@ -524,7 +512,6 @@ def check_partition(
@
pytest
.
mark
.
parametrize
(
"part_method"
,
[
"metis"
,
"random"
])
@
pytest
.
mark
.
parametrize
(
"reshuffle"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"num_parts"
,
[
1
,
4
])
@
pytest
.
mark
.
parametrize
(
"num_trainers_per_machine"
,
[
1
,
4
])
@
pytest
.
mark
.
parametrize
(
"load_feats"
,
[
True
,
False
])
...
...
@@ -533,7 +520,6 @@ def check_partition(
)
def
test_partition
(
part_method
,
reshuffle
,
num_parts
,
num_trainers_per_machine
,
load_feats
,
...
...
@@ -546,7 +532,6 @@ def test_partition(
check_partition
(
g
,
part_method
,
reshuffle
,
num_parts
,
num_trainers_per_machine
,
load_feats
,
...
...
@@ -563,31 +548,6 @@ def test_partition(
)
reset_envs
()
def
test_BasicPartitionBook
():
part_id
=
0
num_parts
=
2
node_map
=
np
.
random
.
choice
(
num_parts
,
1000
)
edge_map
=
np
.
random
.
choice
(
num_parts
,
5000
)
graph
=
dgl
.
rand_graph
(
1000
,
5000
)
graph
=
dgl
.
node_subgraph
(
graph
,
F
.
arange
(
0
,
graph
.
num_nodes
()))
gpb
=
BasicPartitionBook
(
part_id
,
num_parts
,
node_map
,
edge_map
,
graph
)
c_etype
=
(
"_N"
,
"_E"
,
"_N"
)
assert
gpb
.
etypes
==
[
"_E"
]
assert
gpb
.
canonical_etypes
==
[
c_etype
]
node_policy
=
NodePartitionPolicy
(
gpb
,
"_N"
)
assert
node_policy
.
type_name
==
"_N"
expect_except
=
False
try
:
edge_policy
=
EdgePartitionPolicy
(
gpb
,
"_E"
)
except
AssertionError
:
expect_except
=
True
assert
expect_except
edge_policy
=
EdgePartitionPolicy
(
gpb
,
c_etype
)
assert
edge_policy
.
type_name
==
c_etype
def
test_RangePartitionBook
():
part_id
=
1
num_parts
=
2
...
...
@@ -699,3 +659,27 @@ def test_RangePartitionBook():
assert
expect_except
data_name
=
HeteroDataName
(
False
,
c_etype
,
"feat"
)
assert
data_name
.
get_type
()
==
c_etype
def
test_UnknownPartitionBook
():
node_map
=
{
'_N'
:
{
0
:
0
,
1
:
1
,
2
:
2
}}
edge_map
=
{
'_N:_E:_N'
:
{
0
:
0
,
1
:
1
,
2
:
2
}}
part_metadata
=
{
"num_parts"
:
1
,
"num_nodes"
:
len
(
node_map
),
"num_edges"
:
len
(
edge_map
),
"node_map"
:
node_map
,
"edge_map"
:
edge_map
,
"graph_name"
:
"test_graph"
}
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
part_config
=
os
.
path
.
join
(
test_dir
,
"test_graph.json"
)
with
open
(
part_config
,
"w"
)
as
file
:
json
.
dump
(
part_metadata
,
file
,
indent
=
4
)
try
:
load_partition_book
(
part_config
,
0
)
except
Exception
as
e
:
if
not
isinstance
(
e
,
TypeError
):
raise
e
tests/pytorch/test_dist_optim.py
View file @
3a2a5031
...
...
@@ -92,7 +92,7 @@ def run_client(graph_name, cli_id, part_id, server_count):
os
.
environ
[
"DGL_NUM_SERVER"
]
=
str
(
server_count
)
dgl
.
distributed
.
initialize
(
"optim_ip_config.txt"
)
gpb
,
graph_name
,
_
,
_
=
load_partition_book
(
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
,
None
"/tmp/dist_graph/{}.json"
.
format
(
graph_name
),
part_id
)
g
=
DistGraph
(
graph_name
,
gpb
=
gpb
)
policy
=
dgl
.
distributed
.
PartitionPolicy
(
"node"
,
g
.
get_partition_book
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment