Unverified Commit d1cf5c38 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Distributed] adjust various APIs. (#1993)

* rename get_data_size.

* remove g from DistTensor.

* remove g from DistEmbedding.

* clean up API of graph partition book.

* fix DistGraph

* fix lint.

* collect all part policies.

* fix.

* fix.

* support distributed sampler.

* remove partition.py
parent a6b44e72
......@@ -97,11 +97,11 @@ class DistSAGE(nn.Module):
# TODO: can we standardize this?
nodes = dgl.distributed.node_split(np.arange(g.number_of_nodes()),
g.get_partition_book(), force_even=True)
y = dgl.distributed.DistTensor(g, (g.number_of_nodes(), self.n_hidden), th.float32, 'h',
y = dgl.distributed.DistTensor((g.number_of_nodes(), self.n_hidden), th.float32, 'h',
persistent=True)
for l, layer in enumerate(self.layers):
if l == len(self.layers) - 1:
y = dgl.distributed.DistTensor(g, (g.number_of_nodes(), self.n_classes),
y = dgl.distributed.DistTensor((g.number_of_nodes(), self.n_classes),
th.float32, 'h_last', persistent=True)
sampler = NeighborSampler(g, [-1], dgl.distributed.sample_neighbors, device)
......@@ -263,7 +263,7 @@ def main(args):
dgl.distributed.initialize(args.ip_config, args.num_servers, num_workers=args.num_workers)
if not args.standalone:
th.distributed.init_process_group(backend='gloo')
g = dgl.distributed.DistGraph(args.ip_config, args.graph_name, part_config=args.part_config)
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config)
print('rank:', g.rank())
pb = g.get_partition_book()
......
......@@ -195,11 +195,11 @@ class DistSAGE(SAGE):
# TODO: can we standardize this?
nodes = dgl.distributed.node_split(np.arange(g.number_of_nodes()),
g.get_partition_book(), force_even=True)
y = dgl.distributed.DistTensor(g, (g.number_of_nodes(), self.n_hidden), th.float32, 'h',
y = dgl.distributed.DistTensor((g.number_of_nodes(), self.n_hidden), th.float32, 'h',
persistent=True)
for l, layer in enumerate(self.layers):
if l == len(self.layers) - 1:
y = dgl.distributed.DistTensor(g, (g.number_of_nodes(), self.n_classes),
y = dgl.distributed.DistTensor((g.number_of_nodes(), self.n_classes),
th.float32, 'h_last', persistent=True)
sampler = PosNeighborSampler(g, [-1], dgl.distributed.sample_neighbors)
......@@ -421,7 +421,7 @@ def main(args):
dgl.distributed.initialize(args.ip_config, args.num_servers, num_workers=args.num_workers)
if not args.standalone:
th.distributed.init_process_group(backend='gloo')
g = dgl.distributed.DistGraph(args.ip_config, args.graph_name, part_config=args.part_config)
g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config)
print('rank:', g.rank())
print('number of edges', g.number_of_edges())
......
"""Data loading components for neighbor sampling"""
from .dataloader import BlockSampler
from .. import sampling, subgraph
from .. import sampling, subgraph, distributed
class MultiLayerNeighborSampler(BlockSampler):
"""Sampler that builds computational dependency of node representations via
......@@ -59,10 +59,16 @@ class MultiLayerNeighborSampler(BlockSampler):
def sample_frontier(self, block_id, g, seed_nodes):
fanout = self.fanouts[block_id]
if fanout is None:
frontier = subgraph.in_subgraph(g, seed_nodes)
if isinstance(g, distributed.DistGraph):
if fanout is None:
frontier = distributed.in_subgraph(g, seed_nodes)
else:
frontier = distributed.sample_neighbors(g, seed_nodes, fanout, replace=self.replace)
else:
frontier = sampling.sample_neighbors(g, seed_nodes, fanout, replace=self.replace)
if fanout is None:
frontier = subgraph.in_subgraph(g, seed_nodes)
else:
frontier = sampling.sample_neighbors(g, seed_nodes, fanout, replace=self.replace)
return frontier
class MultiLayerFullNeighborSampler(MultiLayerNeighborSampler):
......
......@@ -9,7 +9,6 @@ from .. import heterograph_index
from .. import backend as F
from ..base import NID, EID
from .kvstore import KVServer, get_kvstore
from .standalone_kvstore import KVClient as SA_KVClient
from .._ffi.ndarray import empty_shared_mem
from ..frame import infer_scheme
from .partition import load_partition, load_partition_book
......@@ -142,7 +141,7 @@ class NodeDataView(MutableMapping):
name1 = _get_data_name(name, policy.policy_str)
dtype, shape, _ = g._client.get_data_meta(name1)
# We create a wrapper on the existing tensor in the kvstore.
self._data[name] = DistTensor(g, shape, dtype, name, part_policy=policy)
self._data[name] = DistTensor(shape, dtype, name, part_policy=policy)
def _get_names(self):
return list(self._data.keys())
......@@ -188,7 +187,7 @@ class EdgeDataView(MutableMapping):
name1 = _get_data_name(name, policy.policy_str)
dtype, shape, _ = g._client.get_data_meta(name1)
# We create a wrapper on the existing tensor in the kvstore.
self._data[name] = DistTensor(g, shape, dtype, name, part_policy=policy)
self._data[name] = DistTensor(shape, dtype, name, part_policy=policy)
def _get_names(self):
return list(self._data.keys())
......@@ -321,8 +320,6 @@ class DistGraph:
Parameters
----------
ip_config : str
Path of IP configuration file.
graph_name : str
The name of the graph. This name has to be the same as the one used in DistGraphServer.
gpb : PartitionBook
......@@ -330,14 +327,13 @@ class DistGraph:
part_config : str
The partition config file. It's used in the standalone mode.
'''
def __init__(self, ip_config, graph_name, gpb=None, part_config=None):
self.ip_config = ip_config
def __init__(self, graph_name, gpb=None, part_config=None):
self.graph_name = graph_name
self._gpb_input = gpb
if os.environ.get('DGL_DIST_MODE', 'standalone') == 'standalone':
assert part_config is not None, \
'When running in the standalone model, the partition config file is required'
self._client = SA_KVClient()
self._client = get_kvstore()
# Load graph partition data.
g, node_feats, edge_feats, self._gpb, _ = load_partition(part_config, 0)
assert self._gpb.num_partitions() == 1, \
......@@ -349,6 +345,7 @@ class DistGraph:
self._client.add_data(_get_data_name(name, NODE_PART_POLICY), node_feats[name])
for name in edge_feats:
self._client.add_data(_get_data_name(name, EDGE_PART_POLICY), edge_feats[name])
self._client.map_shared_data(self._gpb)
rpc.set_num_client(1)
else:
self._init()
......@@ -377,10 +374,10 @@ class DistGraph:
self._client.map_shared_data(self._gpb)
def __getstate__(self):
return self.ip_config, self.graph_name, self._gpb
return self.graph_name, self._gpb
def __setstate__(self, state):
self.ip_config, self.graph_name, self._gpb_input = state
self.graph_name, self._gpb_input = state
self._init()
self._ndata = NodeDataView(self)
......@@ -428,6 +425,43 @@ class DistGraph:
"""
return self._edata
@property
def idtype(self):
"""The dtype of graph index
Returns
-------
backend dtype object
th.int32/th.int64 or tf.int32/tf.int64 etc.
See Also
--------
long
int
"""
return self._g.idtype
@property
def device(self):
"""Get the device context of this graph.
Examples
--------
The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> print(g.device)
device(type='cpu')
>>> g = g.to('cuda:0')
>>> print(g.device)
device(type='cuda', index=0)
Returns
-------
Device context object
"""
return self._g.device
@property
def ntypes(self):
"""Return the list of node types of this graph.
......@@ -439,7 +473,7 @@ class DistGraph:
Examples
--------
>>> g = DistGraph("ip_config.txt", "test")
>>> g = DistGraph("test")
>>> g.ntypes
['_U']
"""
......@@ -457,7 +491,7 @@ class DistGraph:
Examples
--------
>>> g = DistGraph("ip_config.txt", "test")
>>> g = DistGraph("test")
>>> g.etypes
['_E']
"""
......
......@@ -2,9 +2,8 @@
import os
from .graph_partition_book import PartitionPolicy, NODE_PART_POLICY, EDGE_PART_POLICY
from .dist_context import is_initialized
from ..base import DGLError
from .kvstore import get_kvstore
from .. import utils
from .. import backend as F
......@@ -35,8 +34,6 @@ class DistTensor:
Parameters
----------
g : DistGraph
The distributed graph object.
shape : tuple
The shape of the tensor
dtype : dtype
......@@ -50,27 +47,34 @@ class DistTensor:
persistent : bool
Whether the created tensor is persistent.
'''
def __init__(self, g, shape, dtype, name=None, init_func=None, part_policy=None,
def __init__(self, shape, dtype, name=None, init_func=None, part_policy=None,
persistent=False):
self.kvstore = g._client
self.kvstore = get_kvstore()
self._shape = shape
self._dtype = dtype
part_policies = self.kvstore.all_possible_part_policy
# If a user doesn't provide a partition policy, we should find one based on
# the input shape.
if part_policy is None:
assert shape[0] != g.number_of_nodes() or shape[0] != g.number_of_edges(), \
for policy_name in part_policies:
policy = part_policies[policy_name]
if policy.get_size() == shape[0]:
# If multiple partition policies match the input shape, we cannot
# decide which is the right one automatically. We should ask users
# to provide one.
assert part_policy is None, \
'Multiple partition policies match the input shape. ' \
+ 'Please provide a partition policy explicitly.'
part_policy = policy
assert part_policy is not None, \
'Cannot determine the partition policy. Please provide it.'
if shape[0] == g.number_of_nodes():
part_policy = PartitionPolicy(NODE_PART_POLICY, g.get_partition_book())
elif shape[0] == g.number_of_edges():
part_policy = PartitionPolicy(EDGE_PART_POLICY, g.get_partition_book())
else:
raise DGLError('Cannot determine the partition policy. Please provide it.')
self._part_policy = part_policy
if init_func is None:
init_func = _default_init_data
exist_names = g._client.data_name_list()
exist_names = self.kvstore.data_name_list()
# If a user doesn't provide a name, we generate a name ourselves.
# We need to generate the name in a deterministic way.
if name is None:
......@@ -79,11 +83,11 @@ class DistTensor:
self._name = _get_data_name(name, part_policy.policy_str)
self._persistent = persistent
if self._name not in exist_names:
g._client.init_data(self._name, shape, dtype, part_policy, init_func)
self.kvstore.init_data(self._name, shape, dtype, part_policy, init_func)
self._owner = True
else:
self._owner = False
dtype1, shape1, _ = g._client.get_data_meta(self._name)
dtype1, shape1, _ = self.kvstore.get_data_meta(self._name)
assert dtype == dtype1, 'The dtype does not match with the existing tensor'
assert shape == shape1, 'The shape does not match with the existing tensor'
......
......@@ -306,41 +306,6 @@ class GraphPartitionBook:
getting remote tensor of eid2localeid.')
return F.gather_row(self._eidg2l[partid], eids)
def get_partition(self, partid):
"""Get the graph of one partition.
Parameters
----------
partid : int
Partition ID.
Returns
-------
DGLGraph
The graph of the partition.
"""
#TODO(zhengda) add implementation later.
def get_node_size(self):
"""Get the number of nodes in the current partition.
Return
------
int
The number of nodes in current partition
"""
return self._node_size
def get_edge_size(self):
"""Get the number of edges in the current partition.
Return
------
int
The number of edges in current partition
"""
return self._edge_size
@property
def partid(self):
"""Get the current partition id
......@@ -573,45 +538,6 @@ class RangePartitionBook:
return eids - int(start)
def get_partition(self, partid):
"""Get the graph of one partition.
Parameters
----------
partid : int
Partition ID.
Returns
-------
DGLGraph
The graph of the partition.
"""
#TODO(zhengda) add implementation later.
def get_node_size(self):
"""Get the number of nodes in the current partition.
Return
------
int
The number of nodes in current partition
"""
range_start = self._node_map[self._partid - 1] if self._partid > 0 else 0
range_end = self._node_map[self._partid]
return range_end - range_start
def get_edge_size(self):
"""Get the number of edges in the current partition.
Return
------
int
The number of edges in current partition
"""
range_start = self._edge_map[self._partid - 1] if self._partid > 0 else 0
range_end = self._edge_map[self._partid]
return range_end - range_start
@property
def partid(self):
"""Get the current partition id
......@@ -701,7 +627,7 @@ class PartitionPolicy(object):
else:
raise RuntimeError('Cannot support policy: %s ' % self._policy_str)
def get_data_size(self):
def get_part_size(self):
"""Get data size of current partition.
Returns
......@@ -715,3 +641,18 @@ class PartitionPolicy(object):
return len(self._partition_book.partid2nids(self._part_id))
else:
raise RuntimeError('Cannot support policy: %s ' % self._policy_str)
def get_size(self):
"""Get the full size of the data.
Returns
-------
int
data size
"""
if self._policy_str == EDGE_PART_POLICY:
return self._partition_book._num_edges()
elif self._policy_str == NODE_PART_POLICY:
return self._partition_book._num_nodes()
else:
raise RuntimeError('Cannot support policy: %s ' % self._policy_str)
......@@ -738,11 +738,11 @@ class KVServer(object):
dlpack = shared_data.to_dlpack()
self._data_store[name] = F.zerocopy_from_dlpack(dlpack)
self._data_store[name][:] = data_tensor[:]
assert self._part_policy[name].get_data_size() == data_tensor.shape[0], \
assert self._part_policy[name].get_part_size() == data_tensor.shape[0], \
'kvserver expect partition {} for {} has {} rows, but gets {} rows'.format(
self._part_policy[name].part_id,
policy_str,
self._part_policy[name].get_data_size(),
self._part_policy[name].get_part_size(),
data_tensor.shape[0])
self._pull_handlers[name] = default_pull_handler
self._push_handlers[name] = default_push_handler
......@@ -821,6 +821,8 @@ class KVClient(object):
self._data_store = {}
# Store the partition information with specified data name
self._part_policy = {}
# This stores all unique partition policies in the kvstore. The key is the policy name.
self._all_possible_part_policy = {}
# Store the full data shape across kvserver
self._full_data_shape = {}
# Store all the data name
......@@ -840,6 +842,11 @@ class KVClient(object):
# register role on server-0
self._role = role
@property
def all_possible_part_policy(self):
"""Get all possible partition policies"""
return self._all_possible_part_policy
@property
def client_id(self):
"""Get client ID"""
......@@ -961,7 +968,7 @@ class KVClient(object):
# Send request to the servers to initialize data.
# The servers may handle the duplicated initializations.
part_shape = shape.copy()
part_shape[0] = part_policy.get_data_size()
part_shape[0] = part_policy.get_part_size()
request = InitDataRequest(name,
tuple(part_shape),
F.reverse_data_type_dict[dtype],
......@@ -978,7 +985,7 @@ class KVClient(object):
self.barrier()
# Create local shared-data
local_shape = shape.copy()
local_shape[0] = part_policy.get_data_size()
local_shape[0] = part_policy.get_part_size()
if name in self._part_policy:
raise RuntimeError("Policy %s has already exists!" % name)
if name in self._data_store:
......@@ -986,6 +993,7 @@ class KVClient(object):
if name in self._full_data_shape:
raise RuntimeError("Data shape %s has already exists!" % name)
self._part_policy[name] = part_policy
self._all_possible_part_policy[part_policy.policy_str] = part_policy
shared_data = empty_shared_mem(name+'-kvdata-', False, \
local_shape, F.reverse_data_type_dict[dtype])
dlpack = shared_data.to_dlpack()
......@@ -1062,6 +1070,7 @@ class KVClient(object):
dlpack = shared_data.to_dlpack()
self._data_store[name] = F.zerocopy_from_dlpack(dlpack)
self._part_policy[name] = PartitionPolicy(policy_str, partition_book)
self._all_possible_part_policy[policy_str] = self._part_policy[name]
self._pull_handlers[name] = default_pull_handler
self._push_handlers[name] = default_push_handler
# Get full data shape across servers
......
......@@ -3,17 +3,12 @@
from .. import backend as F
from .. import utils
from .dist_tensor import DistTensor
from .graph_partition_book import PartitionPolicy, NODE_PART_POLICY
class DistEmbedding:
'''Embeddings in the distributed training.
By default, the embeddings are created for nodes in the graph.
Parameters
----------
g : DistGraph
The distributed graph object.
num_embeddings : int
The number of embeddings
embedding_dim : int
......@@ -28,7 +23,7 @@ class DistEmbedding:
Examples
--------
>>> emb_init = lambda shape, dtype: F.zeros(shape, dtype, F.cpu())
>>> emb = dgl.distributed.DistEmbedding(g, g.number_of_nodes(), 10)
>>> emb = dgl.distributed.DistEmbedding(g.number_of_nodes(), 10)
>>> optimizer = dgl.distributed.SparseAdagrad([emb], lr=0.001)
>>> for blocks in dataloader:
>>> feats = emb(nids)
......@@ -36,12 +31,9 @@ class DistEmbedding:
>>> loss.backward()
>>> optimizer.step()
'''
def __init__(self, g, num_embeddings, embedding_dim, name=None,
def __init__(self, num_embeddings, embedding_dim, name=None,
init_func=None, part_policy=None):
if part_policy is None:
part_policy = PartitionPolicy(NODE_PART_POLICY, g.get_partition_book())
self._tensor = DistTensor(g, (num_embeddings, embedding_dim), F.float32, name,
self._tensor = DistTensor((num_embeddings, embedding_dim), F.float32, name,
init_func, part_policy)
self._trace = []
......
......@@ -4,6 +4,7 @@ This kvstore is used when running in the standalone mode
"""
from .. import backend as F
from .graph_partition_book import PartitionPolicy, NODE_PART_POLICY, EDGE_PART_POLICY
class KVClient(object):
''' The fake KVStore client.
......@@ -13,9 +14,15 @@ class KVClient(object):
'''
def __init__(self):
self._data = {}
self._all_possible_part_policy = {}
self._push_handlers = {}
self._pull_handlers = {}
@property
def all_possible_part_policy(self):
"""Get all possible partition policies"""
return self._all_possible_part_policy
def barrier(self):
'''barrier'''
......@@ -31,9 +38,11 @@ class KVClient(object):
'''add data to the client'''
self._data[name] = tensor
def init_data(self, name, shape, dtype, _, init_func):
def init_data(self, name, shape, dtype, part_policy, init_func):
'''add new data to the client'''
self._data[name] = init_func(shape, dtype)
if part_policy.policy_str not in self._all_possible_part_policy:
self._all_possible_part_policy[part_policy.policy_str] = part_policy
def delete_data(self, name):
'''delete the data'''
......@@ -63,3 +72,7 @@ class KVClient(object):
def map_shared_data(self, partition_book):
'''Mapping shared-memory tensor from server to client.'''
self._all_possible_part_policy[NODE_PART_POLICY] = PartitionPolicy(NODE_PART_POLICY,
partition_book)
self._all_possible_part_policy[EDGE_PART_POLICY] = PartitionPolicy(EDGE_PART_POLICY,
partition_book)
......@@ -71,7 +71,7 @@ def run_client(graph_name, part_id, server_count, num_clients, num_nodes, num_ed
dgl.distributed.initialize("kv_ip_config.txt", server_count)
gpb, graph_name = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
g = DistGraph("kv_ip_config.txt", graph_name, gpb=gpb)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph(g, num_clients, num_nodes, num_edges)
def check_dist_graph(g, num_clients, num_nodes, num_edges):
......@@ -93,34 +93,34 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test init node data
new_shape = (g.number_of_nodes(), 2)
g.ndata['test1'] = dgl.distributed.DistTensor(g, new_shape, F.int32)
g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32)
feats = g.ndata['test1'][nids]
assert np.all(F.asnumpy(feats) == 0)
# reference to a one that exists
test2 = dgl.distributed.DistTensor(g, new_shape, F.float32, 'test2', init_func=rand_init)
test3 = dgl.distributed.DistTensor(g, new_shape, F.float32, 'test2')
test2 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2', init_func=rand_init)
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2')
assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids]))
# create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(g, new_shape, F.float32, 'test3', init_func=rand_init)
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init)
del test3
test3 = dgl.distributed.DistTensor(g, (g.number_of_nodes(), 3), F.float32, 'test3')
test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3')
del test3
# test a persistent tesnor
test4 = dgl.distributed.DistTensor(g, new_shape, F.float32, 'test4', init_func=rand_init,
test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init,
persistent=True)
del test4
try:
test4 = dgl.distributed.DistTensor(g, (g.number_of_nodes(), 3), F.float32, 'test4')
test4 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test4')
raise Exception('')
except:
pass
# Test sparse emb
try:
emb = DistEmbedding(g, g.number_of_nodes(), 1, 'emb1', emb_init)
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init)
lr = 0.001
optimizer = SparseAdagrad([emb], lr=lr)
with F.record_grad():
......@@ -137,13 +137,13 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book())
grad_sum = dgl.distributed.DistTensor(g, (g.number_of_nodes(),), F.float32,
grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(),), F.float32,
'emb1_sum', policy)
if num_clients == 1:
assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients)
assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))
emb = DistEmbedding(g, g.number_of_nodes(), 1, 'emb2', emb_init)
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init)
with F.no_grad():
feats1 = emb(nids)
assert np.all(F.asnumpy(feats1) == 0)
......@@ -251,8 +251,7 @@ def test_standalone():
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
dgl.distributed.initialize("kv_ip_config.txt")
dist_g = DistGraph("kv_ip_config.txt", graph_name,
part_config='/tmp/dist_graph/{}.json'.format(graph_name))
dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name))
check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
dgl.distributed.exit_client() # this is needed since there's two test here in one process
......
......@@ -26,7 +26,7 @@ def start_sample_client(rank, tmpdir, disable_shared_mem):
if disable_shared_mem:
_, _, _, gpb, _ = load_partition(tmpdir / 'test_sampling.json', rank)
dgl.distributed.initialize("rpc_ip_config.txt", 1)
dist_graph = DistGraph("rpc_ip_config.txt", "test_sampling", gpb=gpb)
dist_graph = DistGraph("test_sampling", gpb=gpb)
sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3)
dgl.distributed.exit_client()
return sampled_graph
......@@ -36,7 +36,7 @@ def start_find_edges_client(rank, tmpdir, disable_shared_mem, eids):
if disable_shared_mem:
_, _, _, gpb, _ = load_partition(tmpdir / 'test_find_edges.json', rank)
dgl.distributed.initialize("rpc_ip_config.txt", 1)
dist_graph = DistGraph("rpc_ip_config.txt", "test_find_edges", gpb=gpb)
dist_graph = DistGraph("test_find_edges", gpb=gpb)
u, v = find_edges(dist_graph, eids)
dgl.distributed.exit_client()
return u, v
......@@ -176,7 +176,7 @@ def check_standalone_sampling(tmpdir):
os.environ['DGL_DIST_MODE'] = 'standalone'
dgl.distributed.initialize("rpc_ip_config.txt", 1)
dist_graph = DistGraph(None, "test_sampling", part_config=tmpdir / 'test_sampling.json')
dist_graph = DistGraph("test_sampling", part_config=tmpdir / 'test_sampling.json')
sampled_graph = sample_neighbors(dist_graph, [0, 10, 99, 66, 1024, 2008], 3)
src, dst = sampled_graph.edges()
......@@ -200,7 +200,7 @@ def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes):
dgl.distributed.initialize("rpc_ip_config.txt", 1)
if disable_shared_mem:
_, _, _, gpb, _ = load_partition(tmpdir / 'test_in_subgraph.json', rank)
dist_graph = DistGraph("rpc_ip_config.txt", "test_in_subgraph", gpb=gpb)
dist_graph = DistGraph("test_in_subgraph", gpb=gpb)
sampled_graph = dgl.distributed.in_subgraph(dist_graph, nodes)
dgl.distributed.exit_client()
return sampled_graph
......
......@@ -55,8 +55,7 @@ def start_client(rank, tmpdir, disable_shared_mem, num_workers, drop_last):
num_nodes_to_sample = 202
batch_size = 32
train_nid = th.arange(num_nodes_to_sample)
dist_graph = DistGraph("mp_ip_config.txt", "test_mp", gpb=gpb,
part_config=tmpdir / 'test_sampling.json')
dist_graph = DistGraph("test_mp", gpb=gpb, part_config=tmpdir / 'test_sampling.json')
# Create sampler
sampler = NeighborSampler(dist_graph, [5, 10],
......
......@@ -100,8 +100,8 @@ def test_partition_policy():
eid_partid = edge_policy.to_partid(F.tensor([0,1,2,3,4,5,6], F.int64))
assert_array_equal(F.asnumpy(nid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0], F.int64)))
assert_array_equal(F.asnumpy(eid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0,0], F.int64)))
assert node_policy.get_data_size() == len(node_map)
assert edge_policy.get_data_size() == len(edge_map)
assert node_policy.get_part_size() == len(node_map)
assert edge_policy.get_part_size() == len(edge_map)
def start_server(server_id, num_clients, num_servers):
# Init kvserver
......
import numpy as np
import argparse
import signal
import dgl
from dgl import backend as F
from dgl.data.utils import load_graphs, save_graphs
from dgl.contrib.dist_graph import partition_graph
import pickle
def main():
parser = argparse.ArgumentParser(description='Partition a graph')
parser.add_argument('--data', required=True, type=str,
help='The file path of the input graph in the DGL format.')
parser.add_argument('--graph-name', required=True, type=str,
help='The graph name')
parser.add_argument('-k', '--num-parts', required=True, type=int,
help='The number of partitions')
parser.add_argument('--num-hops', type=int, default=1,
help='The number of hops of HALO nodes we include in a partition')
parser.add_argument('-m', '--method', required=True, type=str,
help='The partitioning method: random, metis')
parser.add_argument('-o', '--output', required=True, type=str,
help='The output directory of the partitioned results')
args = parser.parse_args()
glist, _ = load_graphs(args.data)
g = glist[0]
partition_graph(g, args.graph_name, args.num_parts, args.output,
num_hops=args.num_hops, part_method=args.method)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment