Unverified Commit 3a2a5031 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[API Deprecation]Deprecate candidates in dgl.distributed (#5116)

parent ab0c0ec6
......@@ -430,7 +430,7 @@ class DistGraph:
The example shows the creation of ``DistGraph`` in the standalone mode.
>>> dgl.distributed.partition_graph(g, 'graph_name', 1, num_hops=1, part_method='metis',
... out_path='output/', reshuffle=True)
... out_path='output/')
>>> g = dgl.distributed.DistGraph('graph_name', part_config='output/graph_name.json')
The example shows the creation of ``DistGraph`` in the distributed mode.
......@@ -513,7 +513,7 @@ class DistGraph:
assert self._client is not None, \
'Distributed module is not initialized. Please call dgl.distributed.initialize.'
self._g = _get_graph_from_shared_mem(self.graph_name)
self._gpb = get_shared_mem_partition_book(self.graph_name, self._g)
self._gpb = get_shared_mem_partition_book(self.graph_name)
if self._gpb is None:
self._gpb = gpb
self._client.map_shared_data(self._gpb)
......
......@@ -8,7 +8,7 @@ import numpy as np
from .. import backend as F
from .. import utils
from .._ffi.ndarray import empty_shared_mem
from ..base import EID, NID, DGLError
from ..base import DGLError
from ..ndarray import exist_shared_mem_array
from ..partition import NDArrayPartition
from .constants import DEFAULT_ETYPE, DEFAULT_NTYPE
......@@ -175,7 +175,7 @@ def _get_shared_mem_metadata(graph_name):
return is_range_part, part_id, num_partitions, node_map, edge_map
def get_shared_mem_partition_book(graph_name, graph_part):
def get_shared_mem_partition_book(graph_name):
"""Get a graph partition book from shared memory.
A graph partition book of a specific graph can be serialized to shared memory.
......@@ -185,8 +185,6 @@ def get_shared_mem_partition_book(graph_name, graph_part):
----------
graph_name : str
The name of the graph.
graph_part : DGLGraph
The graph structure of a partition.
Returns
-------
......@@ -225,9 +223,7 @@ def get_shared_mem_partition_book(graph_name, graph_part):
part_id, num_parts, node_map, edge_map, ntypes, etypes
)
else:
return BasicPartitionBook(
part_id, num_parts, node_map_data, edge_map_data, graph_part
)
raise TypeError("Only RangePartitionBook is supported currently.")
def get_node_partition_from_book(book, device):
......@@ -278,14 +274,10 @@ class GraphPartitionBook(ABC):
* the node IDs and the edge IDs that a partition has.
* the local IDs of nodes and edges in a partition.
Currently, there are two classes that implement ``GraphPartitionBook``:
``BasicGraphPartitionBook`` and ``RangePartitionBook``. ``BasicGraphPartitionBook``
stores the mappings between every individual node/edge ID and partition ID on
every machine, which usually consumes a lot of memory, while ``RangePartitionBook``
calculates the mapping between node/edge IDs and partition IDs based on some small
metadata because nodes/edges have been relabeled to have IDs in the same partition
fall in a contiguous ID range. ``RangePartitionBook`` is usually a preferred way to
provide mappings between node/edge IDs and partition IDs.
Currently, only one class that implement ``GraphPartitionBook``
:``RangePartitionBook``. It calculates the mapping between node/edge IDs
and partition IDs based on some small metadata because nodes/edges have been
relabeled to have IDs in the same partition fall in a contiguous ID range.
A graph partition book is constructed automatically when a graph is partitioned.
When a graph partition is loaded, a graph partition book is loaded as well.
......@@ -541,262 +533,6 @@ class GraphPartitionBook(ABC):
Homogeneous edge IDs.
"""
class BasicPartitionBook(GraphPartitionBook):
"""This provides the most flexible way to store parition information.
The partition book maintains the mapping of every single node IDs and edge IDs to
partition IDs. This is very flexible at the coast of large memory consumption.
On a large graph, the mapping consumes significant memory and this partition book
is not recommended.
Parameters
----------
part_id : int
partition ID of current partition book
num_parts : int
number of total partitions
node_map : tensor
global node ID mapping to partition ID
edge_map : tensor
global edge ID mapping to partition ID
part_graph : DGLGraph
The graph partition structure.
"""
def __init__(self, part_id, num_parts, node_map, edge_map, part_graph):
assert part_id >= 0, "part_id cannot be a negative number."
assert num_parts > 0, "num_parts must be greater than zero."
self._part_id = int(part_id)
self._num_partitions = int(num_parts)
self._nid2partid = F.tensor(node_map)
assert (
F.dtype(self._nid2partid) == F.int64
), "the node map must be stored in an integer array"
self._eid2partid = F.tensor(edge_map)
assert (
F.dtype(self._eid2partid) == F.int64
), "the edge map must be stored in an integer array"
# Get meta data of the partition book.
self._partition_meta_data = []
_, nid_count = np.unique(
F.asnumpy(self._nid2partid), return_counts=True
)
_, eid_count = np.unique(
F.asnumpy(self._eid2partid), return_counts=True
)
for partid in range(self._num_partitions):
part_info = {}
part_info["machine_id"] = partid
part_info["num_nodes"] = int(nid_count[partid])
part_info["num_edges"] = int(eid_count[partid])
self._partition_meta_data.append(part_info)
# Get partid2nids
self._partid2nids = []
sorted_nid = F.tensor(np.argsort(F.asnumpy(self._nid2partid)))
start = 0
for offset in nid_count:
part_nids = sorted_nid[start : start + offset]
start += offset
self._partid2nids.append(part_nids)
# Get partid2eids
self._partid2eids = []
sorted_eid = F.tensor(np.argsort(F.asnumpy(self._eid2partid)))
start = 0
for offset in eid_count:
part_eids = sorted_eid[start : start + offset]
start += offset
self._partid2eids.append(part_eids)
# Get nidg2l
self._nidg2l = [None] * self._num_partitions
global_id = part_graph.ndata[NID]
max_global_id = np.amax(F.asnumpy(global_id))
# TODO(chao): support int32 index
g2l = F.zeros((max_global_id + 1), F.int64, F.context(global_id))
g2l = F.scatter_row(g2l, global_id, F.arange(0, len(global_id)))
self._nidg2l[self._part_id] = g2l
# Get eidg2l
self._eidg2l = [None] * self._num_partitions
global_id = part_graph.edata[EID]
max_global_id = np.amax(F.asnumpy(global_id))
# TODO(chao): support int32 index
g2l = F.zeros((max_global_id + 1), F.int64, F.context(global_id))
g2l = F.scatter_row(g2l, global_id, F.arange(0, len(global_id)))
self._eidg2l[self._part_id] = g2l
# node size and edge size
self._edge_size = len(self.partid2eids(self._part_id))
self._node_size = len(self.partid2nids(self._part_id))
def shared_memory(self, graph_name):
"""Move data to shared memory."""
(
self._meta,
self._nid2partid,
self._eid2partid,
) = _move_metadata_to_shared_mem(
graph_name,
self._num_nodes(),
self._num_edges(),
self._part_id,
self._num_partitions,
self._nid2partid,
self._eid2partid,
False,
)
def num_partitions(self):
"""Return the number of partitions."""
return self._num_partitions
def metadata(self):
"""Return the partition meta data."""
return self._partition_meta_data
def _num_nodes(self, ntype=DEFAULT_NTYPE):
"""The total number of nodes"""
assert (
ntype == DEFAULT_NTYPE
), "Base partition book only supports homogeneous graph."
return len(self._nid2partid)
def _num_edges(self, etype=DEFAULT_ETYPE):
"""The total number of edges"""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
return len(self._eid2partid)
def map_to_per_ntype(self, ids):
"""Map global homogeneous node IDs to node type IDs.
Returns
type_ids, per_type_ids
"""
return F.zeros((len(ids),), F.int32, F.cpu()), ids
def map_to_per_etype(self, ids):
"""Map global homogeneous edge IDs to edge type IDs.
Returns
type_ids, per_type_ids
"""
return F.zeros((len(ids),), F.int32, F.cpu()), ids
def map_to_homo_nid(self, ids, ntype=DEFAULT_NTYPE):
"""Map per-node-type IDs to global node IDs in the homogeneous format."""
assert (
ntype == DEFAULT_NTYPE
), "Base partition book only supports homogeneous graph."
return ids
def map_to_homo_eid(self, ids, etype=DEFAULT_ETYPE):
"""Map per-edge-type IDs to global edge IDs in the homoenegeous format."""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
return ids
def nid2partid(self, nids, ntype=DEFAULT_NTYPE):
"""From global node IDs to partition IDs"""
assert (
ntype == DEFAULT_NTYPE
), "Base partition book only supports homogeneous graph."
return F.gather_row(self._nid2partid, nids)
def eid2partid(self, eids, etype=DEFAULT_ETYPE):
"""From global edge IDs to partition IDs"""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
return F.gather_row(self._eid2partid, eids)
def partid2nids(self, partid, ntype=DEFAULT_NTYPE):
"""From partition id to global node IDs"""
assert (
ntype == DEFAULT_NTYPE
), "Base partition book only supports homogeneous graph."
return self._partid2nids[partid]
def partid2eids(self, partid, etype=DEFAULT_ETYPE):
"""From partition id to global edge IDs"""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
return self._partid2eids[partid]
def nid2localnid(self, nids, partid, ntype=DEFAULT_NTYPE):
"""Get local node IDs within the given partition."""
assert (
ntype == DEFAULT_NTYPE
), "Base partition book only supports homogeneous graph."
if partid != self._part_id:
raise RuntimeError(
"Now GraphPartitionBook does not support \
getting remote tensor of nid2localnid."
)
return F.gather_row(self._nidg2l[partid], nids)
def eid2localeid(self, eids, partid, etype=DEFAULT_ETYPE):
"""Get the local edge ids within the given partition."""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
if partid != self._part_id:
raise RuntimeError(
"Now GraphPartitionBook does not support \
getting remote tensor of eid2localeid."
)
return F.gather_row(self._eidg2l[partid], eids)
@property
def partid(self):
"""Get the current partition ID"""
return self._part_id
@property
def ntypes(self):
"""Get the list of node types"""
return [DEFAULT_NTYPE]
@property
def etypes(self):
"""Get the list of edge types"""
return [DEFAULT_ETYPE[1]]
@property
def canonical_etypes(self):
"""Get the list of canonical edge types
Returns
-------
list[(str, str, str)]
A list of canonical etypes
"""
return [DEFAULT_ETYPE]
def to_canonical_etype(self, etype):
"""Convert an edge type to the corresponding canonical edge type.
Parameters
----------
etype : str or (str, str, str)
The edge type
Returns
-------
(str, str, str)
The corresponding canonical edge type
"""
assert etype in (
DEFAULT_ETYPE,
DEFAULT_ETYPE[1],
), "Base partition book only supports homogeneous graph."
return self.canonical_etypes[0]
class RangePartitionBook(GraphPartitionBook):
"""This partition book supports more efficient storage of partition information.
......
This diff is collapsed.
......@@ -682,7 +682,7 @@ elif mode == "client":
dgl.distributed.initialize(ip_config, net_type=net_type)
gpb, graph_name, _, _ = load_partition_book(
graph_path + "/{}.json".format(graph_name), part_id, None
graph_path + "/{}.json".format(graph_name), part_id
)
g = dgl.distributed.DistGraph(graph_name, gpb=gpb)
......
......@@ -119,7 +119,7 @@ def run_client_empty(
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_empty(g, num_clients, num_nodes, num_edges)
......@@ -187,7 +187,7 @@ def run_client(
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph(g, num_clients, num_nodes, num_edges)
......@@ -206,7 +206,7 @@ def run_emb_client(
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_emb(g, num_clients, num_nodes, num_edges)
......@@ -230,7 +230,7 @@ def run_optim_client(
backend="gloo", rank=rank, world_size=world_size
)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_optim_store(rank, num_nodes, optimizer_states, save)
......@@ -279,7 +279,7 @@ def run_client_hierarchy(
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
node_mask = F.tensor(node_mask)
......@@ -687,7 +687,7 @@ def run_client_hetero(
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_hetero(g, num_clients, num_nodes, num_edges)
......
......@@ -101,7 +101,7 @@ def check_rpc_sampling(tmpdir, num_server):
num_hops = 1
partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=False)
num_hops=num_hops, part_method='metis')
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -132,7 +132,7 @@ def check_rpc_find_edges_shuffle(tmpdir, num_server):
orig_nid, orig_eid = partition_graph(g, 'test_find_edges', num_parts, tmpdir,
num_hops=1, part_method='metis',
reshuffle=True, return_mapping=True)
return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -178,7 +178,7 @@ def check_rpc_hetero_find_edges_shuffle(tmpdir, num_server):
orig_nid, orig_eid = partition_graph(g, 'test_find_edges', num_parts, tmpdir,
num_hops=1, part_method='metis',
reshuffle=True, return_mapping=True)
return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -227,7 +227,7 @@ def check_rpc_get_degree_shuffle(tmpdir, num_server):
num_parts = num_server
orig_nid, _ = partition_graph(g, 'test_get_degrees', num_parts, tmpdir,
num_hops=1, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=1, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -281,7 +281,7 @@ def check_rpc_sampling_shuffle(tmpdir, num_server, num_groups=1):
num_hops = 1
orig_nids, orig_eids = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -379,7 +379,7 @@ def check_rpc_hetero_sampling_shuffle(tmpdir, num_server):
num_hops = 1
orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -431,7 +431,7 @@ def check_rpc_hetero_sampling_empty_shuffle(tmpdir, num_server):
orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis',
reshuffle=True, return_mapping=True)
return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -461,7 +461,7 @@ def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server, graph_formats=No
num_hops = 1
orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True,
num_hops=num_hops, part_method='metis', return_mapping=True,
graph_formats=graph_formats)
pserver_list = []
......@@ -515,7 +515,7 @@ def check_rpc_hetero_etype_sampling_empty_shuffle(tmpdir, num_server):
orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis',
reshuffle=True, return_mapping=True)
return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -607,7 +607,7 @@ def check_rpc_bipartite_sampling_empty(tmpdir, num_server):
num_hops = 1
orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -641,7 +641,7 @@ def check_rpc_bipartite_sampling_shuffle(tmpdir, num_server):
num_hops = 1
orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -692,7 +692,7 @@ def check_rpc_bipartite_etype_sampling_empty(tmpdir, num_server):
num_hops = 1
orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -727,7 +727,7 @@ def check_rpc_bipartite_etype_sampling_shuffle(tmpdir, num_server):
num_hops = 1
orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=num_hops, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -795,7 +795,7 @@ def test_rpc_sampling_shuffle(num_server):
check_rpc_bipartite_etype_sampling_empty(Path(tmpdirname), num_server)
check_rpc_bipartite_etype_sampling_shuffle(Path(tmpdirname), num_server)
def check_standalone_sampling(tmpdir, reshuffle):
def check_standalone_sampling(tmpdir):
g = CitationGraphDataset("cora")[0]
prob = np.maximum(np.random.randn(g.num_edges()), 0)
mask = (prob > 0)
......@@ -804,7 +804,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
num_parts = 1
num_hops = 1
partition_graph(g, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
num_hops=num_hops, part_method='metis')
os.environ['DGL_DIST_MODE'] = 'standalone'
dgl.distributed.initialize("rpc_ip_config.txt")
......@@ -829,7 +829,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
assert (prob[eid] > 0).all()
dgl.distributed.exit_client()
def check_standalone_etype_sampling(tmpdir, reshuffle):
def check_standalone_etype_sampling(tmpdir):
hg = CitationGraphDataset('cora')[0]
prob = np.maximum(np.random.randn(hg.num_edges()), 0)
mask = (prob > 0)
......@@ -839,7 +839,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
num_hops = 1
partition_graph(hg, 'test_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
num_hops=num_hops, part_method='metis')
os.environ['DGL_DIST_MODE'] = 'standalone'
dgl.distributed.initialize("rpc_ip_config.txt")
dist_graph = DistGraph("test_sampling", part_config=tmpdir / 'test_sampling.json')
......@@ -863,7 +863,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
assert (prob[eid] > 0).all()
dgl.distributed.exit_client()
def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle):
def check_standalone_etype_sampling_heterograph(tmpdir):
hg = CitationGraphDataset('cora')[0]
num_parts = 1
num_hops = 1
......@@ -872,7 +872,7 @@ def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle):
('paper', 'cite-by', 'paper'): (dst, src)},
{'paper': hg.number_of_nodes()})
partition_graph(new_hg, 'test_hetero_sampling', num_parts, tmpdir,
num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
num_hops=num_hops, part_method='metis')
os.environ['DGL_DIST_MODE'] = 'standalone'
dgl.distributed.initialize("rpc_ip_config.txt")
dist_graph = DistGraph("test_hetero_sampling", part_config=tmpdir / 'test_hetero_sampling.json')
......@@ -892,8 +892,7 @@ def test_standalone_sampling():
import tempfile
os.environ['DGL_DIST_MODE'] = 'standalone'
with tempfile.TemporaryDirectory() as tmpdirname:
check_standalone_sampling(Path(tmpdirname), False)
check_standalone_sampling(Path(tmpdirname), True)
check_standalone_sampling(Path(tmpdirname))
def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes):
gpb = None
......@@ -917,7 +916,7 @@ def check_rpc_in_subgraph_shuffle(tmpdir, num_server):
num_parts = num_server
orig_nid, orig_eid = partition_graph(g, 'test_in_subgraph', num_parts, tmpdir,
num_hops=1, part_method='metis', reshuffle=True, return_mapping=True)
num_hops=1, part_method='metis', return_mapping=True)
pserver_list = []
ctx = mp.get_context('spawn')
......@@ -964,23 +963,21 @@ def test_standalone_etype_sampling():
import tempfile
with tempfile.TemporaryDirectory() as tmpdirname:
os.environ['DGL_DIST_MODE'] = 'standalone'
check_standalone_etype_sampling_heterograph(Path(tmpdirname), True)
check_standalone_etype_sampling_heterograph(Path(tmpdirname))
with tempfile.TemporaryDirectory() as tmpdirname:
os.environ['DGL_DIST_MODE'] = 'standalone'
check_standalone_etype_sampling(Path(tmpdirname), True)
check_standalone_etype_sampling(Path(tmpdirname))
if __name__ == "__main__":
import tempfile
with tempfile.TemporaryDirectory() as tmpdirname:
os.environ['DGL_DIST_MODE'] = 'standalone'
check_standalone_etype_sampling_heterograph(Path(tmpdirname), True)
check_standalone_etype_sampling_heterograph(Path(tmpdirname))
with tempfile.TemporaryDirectory() as tmpdirname:
os.environ['DGL_DIST_MODE'] = 'standalone'
check_standalone_etype_sampling(Path(tmpdirname), True)
check_standalone_etype_sampling(Path(tmpdirname), False)
check_standalone_sampling(Path(tmpdirname), True)
check_standalone_sampling(Path(tmpdirname), False)
check_standalone_etype_sampling(Path(tmpdirname))
check_standalone_sampling(Path(tmpdirname))
os.environ['DGL_DIST_MODE'] = 'distributed'
check_rpc_sampling(Path(tmpdirname), 2)
check_rpc_sampling(Path(tmpdirname), 1)
......
......@@ -162,7 +162,6 @@ def test_standalone():
test_dir,
num_hops=num_hops,
part_method="metis",
reshuffle=True,
return_mapping=True,
)
part_config = os.path.join(test_dir, "test_sampling.json")
......@@ -262,7 +261,6 @@ def check_neg_dataloader(g, num_server, num_workers):
test_dir,
num_hops=num_hops,
part_method="metis",
reshuffle=True,
return_mapping=True,
)
part_config = os.path.join(test_dir, "test_sampling.json")
......@@ -317,10 +315,9 @@ def check_neg_dataloader(g, num_server, num_workers):
@pytest.mark.parametrize("num_server", [3])
@pytest.mark.parametrize("num_workers", [0, 4])
@pytest.mark.parametrize("drop_last", [True, False])
@pytest.mark.parametrize("reshuffle", [True, False])
@pytest.mark.parametrize("num_groups", [1])
def test_dist_dataloader(
num_server, num_workers, drop_last, reshuffle, num_groups
num_server, num_workers, drop_last, num_groups
):
reset_envs()
# No multiple partitions on single machine for
......@@ -343,7 +340,6 @@ def test_dist_dataloader(
test_dir,
num_hops=num_hops,
part_method="metis",
reshuffle=reshuffle,
return_mapping=True,
)
......@@ -560,7 +556,6 @@ def check_dataloader(g, num_server, num_workers, dataloader_type):
test_dir,
num_hops=num_hops,
part_method="metis",
reshuffle=True,
return_mapping=True,
)
part_config = os.path.join(test_dir, "test_sampling.json")
......
......@@ -18,8 +18,8 @@ if os.name != "nt":
import struct
# Create an one-part Graph
node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64)
edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)
node_map = {'_N': F.tensor([[0, 6]], F.int64)}
edge_map = {('_N','_E','_N'): F.tensor([[0, 7]], F.int64)}
global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64)
global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)
......@@ -36,8 +36,10 @@ g.add_edges(2, 5) # 6
g.ndata[dgl.NID] = global_nid
g.edata[dgl.EID] = global_eid
gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(
part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map, part_graph=g
gpb = dgl.distributed.graph_partition_book.RangePartitionBook(
part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map,
ntypes={ntype: i for i, ntype in enumerate(g.ntypes)},
etypes={etype: i for i, etype in enumerate(g.canonical_etypes)}
)
node_policy = dgl.distributed.PartitionPolicy(
......@@ -110,8 +112,8 @@ def test_partition_policy():
F.asnumpy(eid_partid),
F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)),
)
assert node_policy.get_part_size() == len(node_map)
assert edge_policy.get_part_size() == len(edge_map)
assert node_policy.get_part_size() == len(local_nid)
assert edge_policy.get_part_size() == len(local_eid)
def start_server(server_id, num_clients, num_servers):
......
......@@ -3,18 +3,20 @@ import os
import backend as F
import torch as th
import dgl
import json
import numpy as np
import pytest
import tempfile
from dgl import function as fn
from dgl.distributed import (
load_partition,
load_partition_book,
load_partition_feats,
partition_graph,
)
from dgl.distributed.graph_partition_book import (
DEFAULT_ETYPE,
DEFAULT_NTYPE,
BasicPartitionBook,
EdgePartitionPolicy,
HeteroDataName,
NodePartitionPolicy,
......@@ -226,7 +228,6 @@ def check_hetero_partition(
"/tmp/partition",
num_hops=num_hops,
part_method=part_method,
reshuffle=True,
return_mapping=True,
num_trainers_per_machine=num_trainers_per_machine,
graph_formats=graph_formats,
......@@ -328,7 +329,6 @@ def check_hetero_partition(
def check_partition(
g,
part_method,
reshuffle,
num_parts=4,
num_trainers_per_machine=1,
load_feats=True,
......@@ -352,7 +352,6 @@ def check_partition(
"/tmp/partition",
num_hops=num_hops,
part_method=part_method,
reshuffle=reshuffle,
return_mapping=True,
num_trainers_per_machine=num_trainers_per_machine,
graph_formats=graph_formats,
......@@ -445,24 +444,16 @@ def check_partition(
assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))
if reshuffle:
local_orig_nids = orig_nids[part_g.ndata[dgl.NID]]
local_orig_eids = orig_eids[part_g.edata[dgl.EID]]
part_g.ndata["feats"] = F.gather_row(
g.ndata["feats"], local_orig_nids
)
part_g.edata["feats"] = F.gather_row(
g.edata["feats"], local_orig_eids
)
local_nodes = orig_nids[local_nodes]
local_edges = orig_eids[local_edges]
else:
part_g.ndata["feats"] = F.gather_row(
g.ndata["feats"], part_g.ndata[dgl.NID]
)
part_g.edata["feats"] = F.gather_row(
g.edata["feats"], part_g.edata[dgl.NID]
)
local_orig_nids = orig_nids[part_g.ndata[dgl.NID]]
local_orig_eids = orig_eids[part_g.edata[dgl.EID]]
part_g.ndata["feats"] = F.gather_row(
g.ndata["feats"], local_orig_nids
)
part_g.edata["feats"] = F.gather_row(
g.edata["feats"], local_orig_eids
)
local_nodes = orig_nids[local_nodes]
local_edges = orig_eids[local_edges]
part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h"))
part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh"))
......@@ -490,41 +481,37 @@ def check_partition(
assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))
# This only works if node/edge IDs are shuffled.
if reshuffle:
shuffled_labels.append(node_feats["_N/labels"])
shuffled_edata.append(edge_feats["_N:_E:_N/feats"])
shuffled_labels.append(node_feats["_N/labels"])
shuffled_edata.append(edge_feats["_N:_E:_N/feats"])
# Verify that we can reconstruct node/edge data for original IDs.
if reshuffle:
shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
orig_labels = np.zeros(
shuffled_labels.shape, dtype=shuffled_labels.dtype
)
orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
assert np.all(orig_labels == F.asnumpy(g.ndata["labels"]))
assert np.all(orig_edata == F.asnumpy(g.edata["feats"]))
if reshuffle:
node_map = []
edge_map = []
for i, (num_nodes, num_edges) in enumerate(part_sizes):
node_map.append(np.ones(num_nodes) * i)
edge_map.append(np.ones(num_edges) * i)
node_map = np.concatenate(node_map)
edge_map = np.concatenate(edge_map)
nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
assert F.dtype(nid2pid) in (F.int32, F.int64)
assert np.all(F.asnumpy(nid2pid) == node_map)
eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
assert F.dtype(eid2pid) in (F.int32, F.int64)
assert np.all(F.asnumpy(eid2pid) == edge_map)
shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
orig_labels = np.zeros(
shuffled_labels.shape, dtype=shuffled_labels.dtype
)
orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
assert np.all(orig_labels == F.asnumpy(g.ndata["labels"]))
assert np.all(orig_edata == F.asnumpy(g.edata["feats"]))
node_map = []
edge_map = []
for i, (num_nodes, num_edges) in enumerate(part_sizes):
node_map.append(np.ones(num_nodes) * i)
edge_map.append(np.ones(num_edges) * i)
node_map = np.concatenate(node_map)
edge_map = np.concatenate(edge_map)
nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
assert F.dtype(nid2pid) in (F.int32, F.int64)
assert np.all(F.asnumpy(nid2pid) == node_map)
eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
assert F.dtype(eid2pid) in (F.int32, F.int64)
assert np.all(F.asnumpy(eid2pid) == edge_map)
@pytest.mark.parametrize("part_method", ["metis", "random"])
@pytest.mark.parametrize("reshuffle", [True, False])
@pytest.mark.parametrize("num_parts", [1, 4])
@pytest.mark.parametrize("num_trainers_per_machine", [1, 4])
@pytest.mark.parametrize("load_feats", [True, False])
......@@ -533,7 +520,6 @@ def check_partition(
)
def test_partition(
part_method,
reshuffle,
num_parts,
num_trainers_per_machine,
load_feats,
......@@ -546,7 +532,6 @@ def test_partition(
check_partition(
g,
part_method,
reshuffle,
num_parts,
num_trainers_per_machine,
load_feats,
......@@ -563,31 +548,6 @@ def test_partition(
)
reset_envs()
def test_BasicPartitionBook():
part_id = 0
num_parts = 2
node_map = np.random.choice(num_parts, 1000)
edge_map = np.random.choice(num_parts, 5000)
graph = dgl.rand_graph(1000, 5000)
graph = dgl.node_subgraph(graph, F.arange(0, graph.num_nodes()))
gpb = BasicPartitionBook(part_id, num_parts, node_map, edge_map, graph)
c_etype = ("_N", "_E", "_N")
assert gpb.etypes == ["_E"]
assert gpb.canonical_etypes == [c_etype]
node_policy = NodePartitionPolicy(gpb, "_N")
assert node_policy.type_name == "_N"
expect_except = False
try:
edge_policy = EdgePartitionPolicy(gpb, "_E")
except AssertionError:
expect_except = True
assert expect_except
edge_policy = EdgePartitionPolicy(gpb, c_etype)
assert edge_policy.type_name == c_etype
def test_RangePartitionBook():
part_id = 1
num_parts = 2
......@@ -699,3 +659,27 @@ def test_RangePartitionBook():
assert expect_except
data_name = HeteroDataName(False, c_etype, "feat")
assert data_name.get_type() == c_etype
def test_UnknownPartitionBook():
node_map = {'_N': {0:0, 1:1, 2:2}}
edge_map = {'_N:_E:_N': {0:0, 1:1, 2:2}}
part_metadata = {
"num_parts": 1,
"num_nodes": len(node_map),
"num_edges": len(edge_map),
"node_map": node_map,
"edge_map": edge_map,
"graph_name": "test_graph"
}
with tempfile.TemporaryDirectory() as test_dir:
part_config = os.path.join(test_dir, "test_graph.json")
with open(part_config, "w") as file:
json.dump(part_metadata, file, indent = 4)
try:
load_partition_book(part_config, 0)
except Exception as e:
if not isinstance(e, TypeError):
raise e
......@@ -92,7 +92,7 @@ def run_client(graph_name, cli_id, part_id, server_count):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("optim_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
"/tmp/dist_graph/{}.json".format(graph_name), part_id
)
g = DistGraph(graph_name, gpb=gpb)
policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment