Unverified Commit 89a4cc4d authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4694)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 303b150f
import dgl
import numpy as np
import unittest
import backend as F
import networkx as nx
import unittest
import numpy as np
import pytest
from test_utils.graph_cases import get_cases
from test_utils import parametrize_idtype
from test_utils.graph_cases import get_cases
import dgl
@parametrize_idtype
def test_sum_case1(idtype):
# NOTE: If you want to update this test case, remember to update the docstring
# example too!!!
g1 = dgl.graph(([0, 1], [1, 0]), idtype=idtype, device=F.ctx())
g1.ndata['h'] = F.tensor([1., 2.])
g1.ndata["h"] = F.tensor([1.0, 2.0])
g2 = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx())
g2.ndata['h'] = F.tensor([1., 2., 3.])
g2.ndata["h"] = F.tensor([1.0, 2.0, 3.0])
bg = dgl.batch([g1, g2])
bg.ndata['w'] = F.tensor([.1, .2, .1, .5, .2])
assert F.allclose(F.tensor([3.]), dgl.sum_nodes(g1, 'h'))
assert F.allclose(F.tensor([3., 6.]), dgl.sum_nodes(bg, 'h'))
assert F.allclose(F.tensor([.5, 1.7]), dgl.sum_nodes(bg, 'h', 'w'))
bg.ndata["w"] = F.tensor([0.1, 0.2, 0.1, 0.5, 0.2])
assert F.allclose(F.tensor([3.0]), dgl.sum_nodes(g1, "h"))
assert F.allclose(F.tensor([3.0, 6.0]), dgl.sum_nodes(bg, "h"))
assert F.allclose(F.tensor([0.5, 1.7]), dgl.sum_nodes(bg, "h", "w"))
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize('reducer', ['sum', 'max', 'mean'])
@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize("reducer", ["sum", "max", "mean"])
def test_reduce_readout(g, idtype, reducer):
g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2))
g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata["h"] = F.randn((g.number_of_edges(), 2))
# Test.1: node readout
x = dgl.readout_nodes(g, 'h', op=reducer)
x = dgl.readout_nodes(g, "h", op=reducer)
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = dgl.readout_nodes(sg, 'h', op=reducer)
sx = dgl.readout_nodes(sg, "h", op=reducer)
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_nodes'.format(reducer))(g, 'h')
x = getattr(dgl, "{}_nodes".format(reducer))(g, "h")
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = getattr(dgl, '{}_nodes'.format(reducer))(sg, 'h')
sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h")
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout
x = dgl.readout_edges(g, 'h', op=reducer)
x = dgl.readout_edges(g, "h", op=reducer)
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = dgl.readout_edges(sg, 'h', op=reducer)
sx = dgl.readout_edges(sg, "h", op=reducer)
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_edges'.format(reducer))(g, 'h')
x = getattr(dgl, "{}_edges".format(reducer))(g, "h")
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = getattr(dgl, '{}_edges'.format(reducer))(sg, 'h')
sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h")
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize('reducer', ['sum', 'max', 'mean'])
@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize("reducer", ["sum", "max", "mean"])
def test_weighted_reduce_readout(g, idtype, reducer):
g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3))
g.ndata['w'] = F.randn((g.number_of_nodes(), 1))
g.edata['h'] = F.randn((g.number_of_edges(), 2))
g.edata['w'] = F.randn((g.number_of_edges(), 1))
g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.ndata["w"] = F.randn((g.number_of_nodes(), 1))
g.edata["h"] = F.randn((g.number_of_edges(), 2))
g.edata["w"] = F.randn((g.number_of_edges(), 1))
# Test.1: node readout
x = dgl.readout_nodes(g, 'h', 'w', op=reducer)
x = dgl.readout_nodes(g, "h", "w", op=reducer)
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = dgl.readout_nodes(sg, 'h', 'w', op=reducer)
sx = dgl.readout_nodes(sg, "h", "w", op=reducer)
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_nodes'.format(reducer))(g, 'h', 'w')
x = getattr(dgl, "{}_nodes".format(reducer))(g, "h", "w")
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = getattr(dgl, '{}_nodes'.format(reducer))(sg, 'h', 'w')
sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h", "w")
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout
x = dgl.readout_edges(g, 'h', 'w', op=reducer)
x = dgl.readout_edges(g, "h", "w", op=reducer)
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = dgl.readout_edges(sg, 'h', 'w', op=reducer)
sx = dgl.readout_edges(sg, "h", "w", op=reducer)
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_edges'.format(reducer))(g, 'h', 'w')
x = getattr(dgl, "{}_edges".format(reducer))(g, "h", "w")
# check correctness
subg = dgl.unbatch(g)
subx = []
for sg in subg:
sx = getattr(dgl, '{}_edges'.format(reducer))(sg, 'h', 'w')
sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h", "w")
subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize('descending', [True, False])
@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize("descending", [True, False])
def test_topk(g, idtype, descending):
g = g.astype(idtype).to(F.ctx())
g.ndata['x'] = F.randn((g.number_of_nodes(), 3))
g.ndata["x"] = F.randn((g.number_of_nodes(), 3))
# Test.1: to test the case where k > number of nodes.
dgl.topk_nodes(g, 'x', 100, sortby=-1)
dgl.topk_nodes(g, "x", 100, sortby=-1)
# Test.2: test correctness
min_nnodes = F.asnumpy(g.batch_num_nodes()).min()
if min_nnodes <= 1:
return
k = min_nnodes - 1
val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1)
val, indices = dgl.topk_nodes(g, "x", k, descending=descending, sortby=-1)
print(k)
print(g.ndata['x'])
print('val', val)
print('indices', indices)
print(g.ndata["x"])
print("val", val)
print("indices", indices)
subg = dgl.unbatch(g)
subval, subidx = [], []
for sg in subg:
subx = F.asnumpy(sg.ndata['x'])
ai = np.argsort(subx[:,-1:].flatten())
subx = F.asnumpy(sg.ndata["x"])
ai = np.argsort(subx[:, -1:].flatten())
if descending:
ai = np.ascontiguousarray(ai[::-1])
subx = np.expand_dims(subx[ai[:k]], 0)
......@@ -150,28 +156,28 @@ def test_topk(g, idtype, descending):
assert F.allclose(indices, F.cat(subidx, dim=0))
# Test.3: sorby=None
dgl.topk_nodes(g, 'x', k, sortby=None)
dgl.topk_nodes(g, "x", k, sortby=None)
g.edata['x'] = F.randn((g.number_of_edges(), 3))
g.edata["x"] = F.randn((g.number_of_edges(), 3))
# Test.4: topk edges where k > number of edges.
dgl.topk_edges(g, 'x', 100, sortby=-1)
dgl.topk_edges(g, "x", 100, sortby=-1)
# Test.5: topk edges test correctness
min_nedges = F.asnumpy(g.batch_num_edges()).min()
if min_nedges <= 1:
return
k = min_nedges - 1
val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1)
val, indices = dgl.topk_edges(g, "x", k, descending=descending, sortby=-1)
print(k)
print(g.edata['x'])
print('val', val)
print('indices', indices)
print(g.edata["x"])
print("val", val)
print("indices", indices)
subg = dgl.unbatch(g)
subval, subidx = [], []
for sg in subg:
subx = F.asnumpy(sg.edata['x'])
ai = np.argsort(subx[:,-1:].flatten())
subx = F.asnumpy(sg.edata["x"])
ai = np.argsort(subx[:, -1:].flatten())
if descending:
ai = np.ascontiguousarray(ai[::-1])
subx = np.expand_dims(subx[ai[:k]], 0)
......@@ -181,45 +187,51 @@ def test_topk(g, idtype, descending):
assert F.allclose(val, F.cat(subval, dim=0))
assert F.allclose(indices, F.cat(subidx, dim=0))
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
def test_softmax(g, idtype):
g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2))
g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata["h"] = F.randn((g.number_of_edges(), 2))
# Test.1: node readout
x = dgl.softmax_nodes(g, 'h')
x = dgl.softmax_nodes(g, "h")
subg = dgl.unbatch(g)
subx = []
for sg in subg:
subx.append(F.softmax(sg.ndata['h'], dim=0))
subx.append(F.softmax(sg.ndata["h"], dim=0))
assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout
x = dgl.softmax_edges(g, 'h')
x = dgl.softmax_edges(g, "h")
subg = dgl.unbatch(g)
subx = []
for sg in subg:
subx.append(F.softmax(sg.edata['h'], dim=0))
subx.append(F.softmax(sg.edata["h"], dim=0))
assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
def test_broadcast(idtype, g):
g = g.astype(idtype).to(F.ctx())
gfeat = F.randn((g.batch_size, 3))
# Test.0: broadcast_nodes
g.ndata['h'] = dgl.broadcast_nodes(g, gfeat)
g.ndata["h"] = dgl.broadcast_nodes(g, gfeat)
subg = dgl.unbatch(g)
for i, sg in enumerate(subg):
assert F.allclose(sg.ndata['h'],
F.repeat(F.reshape(gfeat[i], (1,3)), sg.number_of_nodes(), dim=0))
assert F.allclose(
sg.ndata["h"],
F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_nodes(), dim=0),
)
# Test.1: broadcast_edges
g.edata['h'] = dgl.broadcast_edges(g, gfeat)
g.edata["h"] = dgl.broadcast_edges(g, gfeat)
subg = dgl.unbatch(g)
for i, sg in enumerate(subg):
assert F.allclose(sg.edata['h'],
F.repeat(F.reshape(gfeat[i], (1,3)), sg.number_of_edges(), dim=0))
assert F.allclose(
sg.edata["h"],
F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_edges(), dim=0),
)
import backend as F
import numpy as np
import dgl
from test_utils import parametrize_idtype
import dgl
@parametrize_idtype
def test_node_removal(idtype):
g = dgl.DGLGraph()
......@@ -10,27 +12,30 @@ def test_node_removal(idtype):
g.add_nodes(10)
g.add_edge(0, 0)
assert g.number_of_nodes() == 10
g.ndata['id'] = F.arange(0, 10)
g.ndata["id"] = F.arange(0, 10)
# remove nodes
g.remove_nodes(range(4, 7))
assert g.number_of_nodes() == 7
assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9]))
assert F.array_equal(g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9]))
assert dgl.NID not in g.ndata
assert dgl.EID not in g.edata
# add nodes
g.add_nodes(3)
assert g.number_of_nodes() == 10
assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0]))
assert F.array_equal(
g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0])
)
# remove nodes
g.remove_nodes(range(1, 4), store_ids=True)
assert g.number_of_nodes() == 7
assert F.array_equal(g.ndata['id'], F.tensor([0, 7, 8, 9, 0, 0, 0]))
assert F.array_equal(g.ndata["id"], F.tensor([0, 7, 8, 9, 0, 0, 0]))
assert dgl.NID in g.ndata
assert dgl.EID in g.edata
@parametrize_idtype
def test_multigraph_node_removal(idtype):
g = dgl.DGLGraph()
......@@ -59,6 +64,7 @@ def test_multigraph_node_removal(idtype):
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6
@parametrize_idtype
def test_multigraph_edge_removal(idtype):
g = dgl.DGLGraph()
......@@ -86,6 +92,7 @@ def test_multigraph_edge_removal(idtype):
assert g.number_of_nodes() == 5
assert g.number_of_edges() == 8
@parametrize_idtype
def test_edge_removal(idtype):
g = dgl.DGLGraph()
......@@ -94,13 +101,15 @@ def test_edge_removal(idtype):
for i in range(5):
for j in range(5):
g.add_edge(i, j)
g.edata['id'] = F.arange(0, 25)
g.edata["id"] = F.arange(0, 25)
# remove edges
g.remove_edges(range(13, 20))
assert g.number_of_nodes() == 5
assert g.number_of_edges() == 18
assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25))))
assert F.array_equal(
g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25)))
)
assert dgl.NID not in g.ndata
assert dgl.EID not in g.edata
......@@ -108,15 +117,20 @@ def test_edge_removal(idtype):
g.add_edge(3, 3)
assert g.number_of_nodes() == 5
assert g.number_of_edges() == 19
assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)) + [0]))
assert F.array_equal(
g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25)) + [0])
)
# remove edges
g.remove_edges(range(2, 10), store_ids=True)
assert g.number_of_nodes() == 5
assert g.number_of_edges() == 11
assert F.array_equal(g.edata['id'], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0]))
assert F.array_equal(
g.edata["id"], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0])
)
assert dgl.EID in g.edata
@parametrize_idtype
def test_node_and_edge_removal(idtype):
g = dgl.DGLGraph()
......@@ -125,7 +139,7 @@ def test_node_and_edge_removal(idtype):
for i in range(10):
for j in range(10):
g.add_edge(i, j)
g.edata['id'] = F.arange(0, 100)
g.edata["id"] = F.arange(0, 100)
assert g.number_of_nodes() == 10
assert g.number_of_edges() == 100
......@@ -156,6 +170,7 @@ def test_node_and_edge_removal(idtype):
assert g.number_of_nodes() == 10
assert g.number_of_edges() == 48
@parametrize_idtype
def test_node_frame(idtype):
g = dgl.DGLGraph()
......@@ -163,11 +178,12 @@ def test_node_frame(idtype):
g.add_nodes(10)
data = np.random.rand(10, 3)
new_data = data.take([0, 1, 2, 7, 8, 9], axis=0)
g.ndata['h'] = F.tensor(data)
g.ndata["h"] = F.tensor(data)
# remove nodes
g.remove_nodes(range(3, 7))
assert F.allclose(g.ndata['h'], F.tensor(new_data))
assert F.allclose(g.ndata["h"], F.tensor(new_data))
@parametrize_idtype
def test_edge_frame(idtype):
......@@ -177,11 +193,12 @@ def test_edge_frame(idtype):
g.add_edges(list(range(10)), list(range(1, 10)) + [0])
data = np.random.rand(10, 3)
new_data = data.take([0, 1, 2, 7, 8, 9], axis=0)
g.edata['h'] = F.tensor(data)
g.edata["h"] = F.tensor(data)
# remove edges
g.remove_edges(range(3, 7))
assert F.allclose(g.edata['h'], F.tensor(new_data))
assert F.allclose(g.edata["h"], F.tensor(new_data))
@parametrize_idtype
def test_issue1287(idtype):
......@@ -192,8 +209,8 @@ def test_issue1287(idtype):
g.add_nodes(5)
g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0])
g.remove_nodes([0, 1])
g.ndata['h'] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2))
g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata["h"] = F.randn((g.number_of_edges(), 2))
# remove edges
g = dgl.DGLGraph()
......@@ -202,10 +219,11 @@ def test_issue1287(idtype):
g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0])
g.remove_edges([0, 1])
g = g.to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2))
g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata["h"] = F.randn((g.number_of_edges(), 2))
if __name__ == '__main__':
if __name__ == "__main__":
test_node_removal()
test_edge_removal()
test_multigraph_node_removal()
......
import os
import tempfile
import time
import unittest
import backend as F
import numpy as np
import scipy as sp
import time
import tempfile
import os
import pytest
import unittest
import scipy as sp
from dgl import DGLGraph
import dgl
import dgl.ndarray as nd
from dgl.data.utils import load_labels, save_tensors, load_tensors
from dgl import DGLGraph
from dgl.data.utils import load_labels, load_tensors, save_tensors
np.random.seed(44)
def generate_rand_graph(n, is_hetero):
arr = (sp.sparse.random(n, n, density=0.1,
format='coo') != 0).astype(np.int64)
arr = (sp.sparse.random(n, n, density=0.1, format="coo") != 0).astype(
np.int64
)
if is_hetero:
return dgl.from_scipy(arr)
else:
......@@ -28,15 +30,15 @@ def construct_graph(n, is_hetero):
g_list = []
for i in range(n):
g = generate_rand_graph(30, is_hetero)
g.edata['e1'] = F.randn((g.number_of_edges(), 32))
g.edata['e2'] = F.ones((g.number_of_edges(), 32))
g.ndata['n1'] = F.randn((g.number_of_nodes(), 64))
g.edata["e1"] = F.randn((g.number_of_edges(), 32))
g.edata["e2"] = F.ones((g.number_of_edges(), 32))
g.ndata["n1"] = F.randn((g.number_of_nodes(), 64))
g_list.append(g)
return g_list
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False])
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_with_feature(is_hetero):
num_graphs = 100
......@@ -66,19 +68,19 @@ def test_graph_serialize_with_feature(is_hetero):
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1'])
assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2'])
assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1'])
assert F.allclose(load_g.edata["e1"], g_list[idx].edata["e1"])
assert F.allclose(load_g.edata["e2"], g_list[idx].edata["e2"])
assert F.allclose(load_g.ndata["n1"], g_list[idx].ndata["n1"])
os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False])
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_without_feature(is_hetero):
num_graphs = 100
g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
......@@ -98,15 +100,16 @@ def test_graph_serialize_without_feature(is_hetero):
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False])
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_with_labels(is_hetero):
num_graphs = 100
g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
......@@ -122,16 +125,16 @@ def test_graph_serialize_with_labels(is_hetero):
idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
loadg_list, l_labels0 = dgl.load_graphs(path, idx_list)
l_labels = load_labels(path)
assert F.allclose(l_labels['label'], labels['label'])
assert F.allclose(l_labels0['label'], labels['label'])
assert F.allclose(l_labels["label"], labels["label"])
assert F.allclose(l_labels0["label"], labels["label"])
idx = idx_list[0]
load_g = loadg_list[0]
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
......@@ -144,8 +147,10 @@ def test_serialize_tensors():
path = f.name
f.close()
tensor_dict = {"a": F.tensor(
[1, 3, -1, 0], dtype=F.int64), "1@1": F.tensor([1.5, 2], dtype=F.float32)}
tensor_dict = {
"a": F.tensor([1, 3, -1, 0], dtype=F.int64),
"1@1": F.tensor([1.5, 2], dtype=F.float32),
}
save_tensors(path, tensor_dict)
......@@ -154,7 +159,8 @@ def test_serialize_tensors():
for key in tensor_dict:
assert key in load_tensor_dict
assert np.array_equal(
F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key]))
F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key])
)
load_nd_dict = load_tensors(path, return_dgl_ndarray=True)
......@@ -162,7 +168,8 @@ def test_serialize_tensors():
assert key in load_nd_dict
assert isinstance(load_nd_dict[key], nd.NDArray)
assert np.array_equal(
load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key]))
load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key])
)
os.unlink(path)
......@@ -185,103 +192,120 @@ def test_serialize_empty_dict():
def test_load_old_files1():
loadg_list, _ = dgl.load_graphs(os.path.join(
os.path.dirname(__file__), "data/1.bin"))
loadg_list, _ = dgl.load_graphs(
os.path.join(os.path.dirname(__file__), "data/1.bin")
)
idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True)
os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True
)
load_g = loadg_list[idx]
load_edges = load_g.all_edges('uv', 'eid')
load_edges = load_g.all_edges("uv", "eid")
assert np.allclose(F.asnumpy(load_edges[0]), edge0)
assert np.allclose(F.asnumpy(load_edges[1]), edge1)
assert np.allclose(F.asnumpy(load_g.edata['e1']), edata_e1)
assert np.allclose(F.asnumpy(load_g.edata['e2']), edata_e2)
assert np.allclose(F.asnumpy(load_g.ndata['n1']), ndata_n1)
assert np.allclose(F.asnumpy(load_g.edata["e1"]), edata_e1)
assert np.allclose(F.asnumpy(load_g.edata["e2"]), edata_e2)
assert np.allclose(F.asnumpy(load_g.ndata["n1"]), ndata_n1)
def test_load_old_files2():
loadg_list, labels0 = dgl.load_graphs(os.path.join(
os.path.dirname(__file__), "data/2.bin"))
labels1 = load_labels(os.path.join(
os.path.dirname(__file__), "data/2.bin"))
idx, edges0, edges1, np_labels = np.load(os.path.join(
os.path.dirname(__file__), "data/2.npy"), allow_pickle=True)
assert np.allclose(F.asnumpy(labels0['label']), np_labels)
assert np.allclose(F.asnumpy(labels1['label']), np_labels)
loadg_list, labels0 = dgl.load_graphs(
os.path.join(os.path.dirname(__file__), "data/2.bin")
)
labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin"))
idx, edges0, edges1, np_labels = np.load(
os.path.join(os.path.dirname(__file__), "data/2.npy"), allow_pickle=True
)
assert np.allclose(F.asnumpy(labels0["label"]), np_labels)
assert np.allclose(F.asnumpy(labels1["label"]), np_labels)
load_g = loadg_list[idx]
print(load_g)
load_edges = load_g.all_edges('uv', 'eid')
load_edges = load_g.all_edges("uv", "eid")
assert np.allclose(F.asnumpy(load_edges[0]), edges0)
assert np.allclose(F.asnumpy(load_edges[1]), edges1)
def create_heterographs(idtype):
g_x = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype)
g_y = dgl.heterograph({
('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr')
g_x.ndata['h'] = F.randn((4, 3))
g_x.edata['w'] = F.randn((3, 2))
g_y.ndata['hh'] = F.ones((4, 5))
g_y.edata['ww'] = F.randn((2, 10))
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]),
('user', 'knows', 'user'): ([0, 2], [2, 3])
}, idtype=idtype)
g.nodes['user'].data['h'] = g_x.ndata['h']
g.nodes['user'].data['hh'] = g_y.ndata['hh']
g.edges['follows'].data['w'] = g_x.edata['w']
g.edges['knows'].data['ww'] = g_y.edata['ww']
g_x = dgl.heterograph(
{("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
)
g_y = dgl.heterograph(
{("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
).formats("csr")
g_x.ndata["h"] = F.randn((4, 3))
g_x.edata["w"] = F.randn((3, 2))
g_y.ndata["hh"] = F.ones((4, 5))
g_y.edata["ww"] = F.randn((2, 10))
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
("user", "knows", "user"): ([0, 2], [2, 3]),
},
idtype=idtype,
)
g.nodes["user"].data["h"] = g_x.ndata["h"]
g.nodes["user"].data["hh"] = g_y.ndata["hh"]
g.edges["follows"].data["w"] = g_x.edata["w"]
g.edges["knows"].data["ww"] = g_y.edata["ww"]
return [g, g_x, g_y]
def create_heterographs2(idtype):
g_x = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype)
g_y = dgl.heterograph({
('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr')
g_x = dgl.heterograph(
{("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
)
g_y = dgl.heterograph(
{("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
).formats("csr")
g_z = dgl.heterograph(
{('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4])}, idtype=idtype)
g_x.ndata['h'] = F.randn((4, 3))
g_x.edata['w'] = F.randn((3, 2))
g_y.ndata['hh'] = F.ones((4, 5))
g_y.edata['ww'] = F.randn((2, 10))
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]),
('user', 'knows', 'user'): ([0, 2], [2, 3]),
('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4])
}, idtype=idtype)
g.nodes['user'].data['h'] = g_x.ndata['h']
g.edges['follows'].data['w'] = g_x.edata['w']
g.nodes['user'].data['hh'] = g_y.ndata['hh']
g.edges[('user', 'knows', 'user')].data['ww'] = g_y.edata['ww']
{("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4])}, idtype=idtype
)
g_x.ndata["h"] = F.randn((4, 3))
g_x.edata["w"] = F.randn((3, 2))
g_y.ndata["hh"] = F.ones((4, 5))
g_y.edata["ww"] = F.randn((2, 10))
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
("user", "knows", "user"): ([0, 2], [2, 3]),
("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4]),
},
idtype=idtype,
)
g.nodes["user"].data["h"] = g_x.ndata["h"]
g.edges["follows"].data["w"] = g_x.edata["w"]
g.nodes["user"].data["hh"] = g_y.ndata["hh"]
g.edges[("user", "knows", "user")].data["ww"] = g_y.edata["ww"]
return [g, g_x, g_y, g_z]
def test_deserialize_old_heterograph_file():
path = os.path.join(
os.path.dirname(__file__), "data/hetero1.bin")
path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
g_list, label_dict = dgl.load_graphs(path)
assert g_list[0].idtype == F.int64
assert g_list[3].idtype == F.int32
assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose(
F.asnumpy(g_list[5].nodes['user'].data['hh']), np.ones((4, 5)))
edges = g_list[0]['follows'].edges()
F.asnumpy(g_list[5].nodes["user"].data["hh"]), np.ones((4, 5))
)
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
assert F.allclose(label_dict['graph_label'], F.ones(54))
assert F.allclose(label_dict["graph_label"], F.ones(54))
def create_old_heterograph_files():
path = os.path.join(
os.path.dirname(__file__), "data/hetero1.bin")
path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
labels_dict = {"graph_label": F.ones(54)}
dgl.save_graphs(path, g_list0, labels_dict)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
def test_serialize_heterograph():
f = tempfile.NamedTemporaryFile(delete=False)
path = f.name
......@@ -295,15 +319,17 @@ def test_serialize_heterograph():
for i in range(len(g_list0)):
for j, etypes in enumerate(g_list0[i].canonical_etypes):
assert g_list[i].canonical_etypes[j] == etypes
#assert g_list[1].restrict_format() == 'any'
#assert g_list[2].restrict_format() == 'csr'
# assert g_list[1].restrict_format() == 'any'
# assert g_list[2].restrict_format() == 'csr'
assert g_list[4].idtype == F.int32
assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose(
F.asnumpy(g_list[6].nodes['user'].data['hh']), np.ones((4, 5)))
edges = g_list[0]['follows'].edges()
F.asnumpy(g_list[6].nodes["user"].data["hh"]), np.ones((4, 5))
)
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
for i in range(len(g_list)):
......@@ -311,12 +337,13 @@ def test_serialize_heterograph():
assert g_list[i].etypes == g_list0[i].etypes
# test set feature after load_graph
g_list[3].nodes['user'].data['test'] = F.tensor([0, 1, 2, 4])
g_list[3].edata['test'] = F.tensor([0, 1, 2])
g_list[3].nodes["user"].data["test"] = F.tensor([0, 1, 2, 4])
g_list[3].edata["test"] = F.tensor([0, 1, 2])
os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.skip(reason="lack of permission on CI")
def test_serialize_heterograph_s3():
path = "s3://dglci-data-test/graph2.bin"
......@@ -325,30 +352,31 @@ def test_serialize_heterograph_s3():
g_list = dgl.load_graphs(path, [0, 2, 5])
assert g_list[0].idtype == F.int64
#assert g_list[1].restrict_format() == 'csr'
# assert g_list[1].restrict_format() == 'csr'
assert np.allclose(
F.asnumpy(g_list[1].nodes['user'].data['hh']), np.ones((4, 5)))
F.asnumpy(g_list[1].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
edges = g_list[0]['follows'].edges()
F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
)
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
if __name__ == "__main__":
pass
#test_graph_serialize_with_feature(True)
#test_graph_serialize_with_feature(False)
#test_graph_serialize_without_feature(True)
#test_graph_serialize_without_feature(False)
#test_graph_serialize_with_labels(True)
#test_graph_serialize_with_labels(False)
#test_serialize_tensors()
#test_serialize_empty_dict()
# test_graph_serialize_with_feature(True)
# test_graph_serialize_with_feature(False)
# test_graph_serialize_without_feature(True)
# test_graph_serialize_without_feature(False)
# test_graph_serialize_with_labels(True)
# test_graph_serialize_with_labels(False)
# test_serialize_tensors()
# test_serialize_empty_dict()
# test_load_old_files1()
test_load_old_files2()
#test_serialize_heterograph()
#test_serialize_heterograph_s3()
#test_deserialize_old_heterograph_file()
#create_old_heterograph_files()
# test_serialize_heterograph()
# test_serialize_heterograph_s3()
# test_deserialize_old_heterograph_file()
# create_old_heterograph_files()
import dgl
import dgl.function as fn
from collections import Counter
import numpy as np
import scipy.sparse as ssp
import itertools
import unittest
from collections import Counter
import backend as F
import networkx as nx
import unittest, pytest
from dgl import DGLError
import numpy as np
import pytest
import scipy.sparse as ssp
from test_utils import parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
def create_test_heterograph(num_nodes, num_adj, idtype):
if isinstance(num_adj, int):
num_adj = [num_adj, num_adj+1]
num_adj_list = list(np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes))
num_adj = [num_adj, num_adj + 1]
num_adj_list = list(
np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes)
)
src = np.concatenate([[i] * num_adj_list[i] for i in range(num_nodes)])
dst = [np.random.choice(num_nodes, nadj, replace=False) for nadj in num_adj_list]
dst = [
np.random.choice(num_nodes, nadj, replace=False)
for nadj in num_adj_list
]
dst = np.concatenate(dst)
return dgl.graph((src, dst), idtype=idtype)
def check_sort(spm, tag_arr=None, tag_pos=None):
if tag_arr is None:
tag_arr = np.arange(spm.shape[0])
......@@ -37,18 +47,20 @@ def check_sort(spm, tag_arr=None, tag_pos=None):
# `tag_pos_ptr` is the expected tag value. Here we check whether the
# tag value is equal to `tag_pos_ptr`
return False
if tag_arr[dst[j]] > tag_arr[dst[j+1]]:
if tag_arr[dst[j]] > tag_arr[dst[j + 1]]:
# The tag should be in descending order after sorting
return False
if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j+1]]:
if j+1 != int(tag_pos_row[tag_pos_ptr+1]):
if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j + 1]]:
if j + 1 != int(tag_pos_row[tag_pos_ptr + 1]):
# The boundary of tag should be consistent with `tag_pos`
return False
tag_pos_ptr = tag_arr[dst[j+1]]
tag_pos_ptr = tag_arr[dst[j + 1]]
return True
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sorting by tag not implemented")
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
@parametrize_idtype
def test_sort_with_tag(idtype):
num_nodes, num_adj, num_tags = 200, [20, 50], 5
......@@ -58,42 +70,50 @@ def test_sort_with_tag(idtype):
edge_tag_dst = F.gather_row(tag, F.tensor(dst))
edge_tag_src = F.gather_row(tag, F.tensor(src))
for tag_type in ['node', 'edge']:
for tag_type in ["node", "edge"]:
new_g = dgl.sort_csr_by_tag(
g, tag if tag_type == 'node' else edge_tag_dst, tag_type=tag_type)
old_csr = g.adjacency_matrix(scipy_fmt='csr')
new_csr = new_g.adjacency_matrix(scipy_fmt='csr')
assert(check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"]))
assert(not check_sort(old_csr, tag)) # Check the original csr is not modified.
g, tag if tag_type == "node" else edge_tag_dst, tag_type=tag_type
)
old_csr = g.adjacency_matrix(scipy_fmt="csr")
new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
assert check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"])
assert not check_sort(
old_csr, tag
) # Check the original csr is not modified.
for tag_type in ['node', 'edge']:
for tag_type in ["node", "edge"]:
new_g = dgl.sort_csc_by_tag(
g, tag if tag_type == 'node' else edge_tag_src, tag_type=tag_type)
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"]))
assert(not check_sort(old_csc, tag))
g, tag if tag_type == "node" else edge_tag_src, tag_type=tag_type
)
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
assert check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"])
assert not check_sort(old_csc, tag)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sorting by tag not implemented")
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
@parametrize_idtype
def test_sort_with_tag_bipartite(idtype):
num_nodes, num_adj, num_tags = 200, [20, 50], 5
g = create_test_heterograph(num_nodes, num_adj, idtype=idtype)
g = dgl.heterograph({('_U', '_E', '_V') : g.edges()})
utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_U')))
vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_V')))
g = dgl.heterograph({("_U", "_E", "_V"): g.edges()})
utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_U")))
vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_V")))
new_g = dgl.sort_csr_by_tag(g, vtag)
old_csr = g.adjacency_matrix(scipy_fmt='csr')
new_csr = new_g.adjacency_matrix(scipy_fmt='csr')
assert(check_sort(new_csr, vtag, new_g.nodes['_U'].data['_TAG_OFFSET']))
assert(not check_sort(old_csr, vtag))
old_csr = g.adjacency_matrix(scipy_fmt="csr")
new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
assert check_sort(new_csr, vtag, new_g.nodes["_U"].data["_TAG_OFFSET"])
assert not check_sort(old_csr, vtag)
new_g = dgl.sort_csc_by_tag(g, utag)
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET']))
assert(not check_sort(old_csc, utag))
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
assert check_sort(new_csc, utag, new_g.nodes["_V"].data["_TAG_OFFSET"])
assert not check_sort(old_csc, utag)
if __name__ == "__main__":
test_sort_with_tag(F.int32)
......
from dgl.ops import gspmm, gsddmm, edge_softmax, segment_reduce
from test_utils.graph_cases import get_cases
from test_utils import parametrize_idtype
import dgl
import random
import pytest, unittest
import networkx as nx
import unittest
import backend as F
import networkx as nx
import numpy as np
import pytest
import torch
from test_utils import parametrize_idtype
from test_utils.graph_cases import get_cases
import dgl
from dgl.ops import edge_softmax, gsddmm, gspmm, segment_reduce
random.seed(42)
np.random.seed(42)
udf_msg = {
'add': lambda edges: {'m': edges.src['x'] + edges.data['w']},
'sub': lambda edges: {'m': edges.src['x'] - edges.data['w']},
'mul': lambda edges: {'m': edges.src['x'] * edges.data['w']},
'div': lambda edges: {'m': edges.src['x'] / edges.data['w']},
'copy_lhs': lambda edges: {'m': edges.src['x']},
'copy_rhs': lambda edges: {'m': edges.data['w']}
"add": lambda edges: {"m": edges.src["x"] + edges.data["w"]},
"sub": lambda edges: {"m": edges.src["x"] - edges.data["w"]},
"mul": lambda edges: {"m": edges.src["x"] * edges.data["w"]},
"div": lambda edges: {"m": edges.src["x"] / edges.data["w"]},
"copy_lhs": lambda edges: {"m": edges.src["x"]},
"copy_rhs": lambda edges: {"m": edges.data["w"]},
}
def select(target, src, edge, dst):
if target == 'u':
if target == "u":
return src
elif target == 'v':
elif target == "v":
return dst
elif target == 'e':
elif target == "e":
return edge
def binary_op(msg, x, y):
if msg == 'add':
if msg == "add":
return x + y
elif msg == 'sub':
elif msg == "sub":
return x - y
elif msg == 'mul':
elif msg == "mul":
return x * y
elif msg == 'div':
elif msg == "div":
return x / y
elif msg == 'dot':
elif msg == "dot":
return F.sum(x * y, -1, keepdims=True)
elif msg == 'copy_lhs':
elif msg == "copy_lhs":
return x
elif msg == 'copy_rhs':
elif msg == "copy_rhs":
return y
def edge_func(lhs_target, rhs_target, msg):
def foo(edges):
return {
'm': binary_op(
"m": binary_op(
msg,
select(lhs_target, edges.src, edges.data, edges.dst)['x'],
select(rhs_target, edges.src, edges.data, edges.dst)['y']
select(lhs_target, edges.src, edges.data, edges.dst)["x"],
select(rhs_target, edges.src, edges.data, edges.dst)["y"],
)
}
return foo
udf_apply_edges = {
lhs_target + '_' + msg + '_' + rhs_target: edge_func(lhs_target, rhs_target, msg)
for lhs_target in ['u', 'v', 'e']
for rhs_target in ['u', 'v', 'e']
for msg in ['add', 'sub', 'mul', 'div', 'dot', 'copy_lhs', 'copy_rhs']
lhs_target
+ "_"
+ msg
+ "_"
+ rhs_target: edge_func(lhs_target, rhs_target, msg)
for lhs_target in ["u", "v", "e"]
for rhs_target in ["u", "v", "e"]
for msg in ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"]
}
udf_reduce = {
'sum': lambda nodes: {'v': F.sum(nodes.mailbox['m'], 1)},
'min': lambda nodes: {'v': F.min(nodes.mailbox['m'], 1)},
'max': lambda nodes: {'v': F.max(nodes.mailbox['m'], 1)}
"sum": lambda nodes: {"v": F.sum(nodes.mailbox["m"], 1)},
"min": lambda nodes: {"v": F.min(nodes.mailbox["m"], 1)},
"max": lambda nodes: {"v": F.max(nodes.mailbox["m"], 1)},
}
graphs = [
# dgl.rand_graph(30, 0),
# dgl.rand_graph(30, 0),
dgl.rand_graph(30, 100),
dgl.rand_bipartite('_U', '_E', '_V', 30, 40, 300)
dgl.rand_bipartite("_U", "_E", "_V", 30, 40, 300),
]
spmm_shapes = [
......@@ -81,7 +93,7 @@ spmm_shapes = [
((1,), (3,)),
((3,), (1,)),
((1,), (1,)),
((), ())
((), ()),
]
sddmm_shapes = [
......@@ -89,17 +101,18 @@ sddmm_shapes = [
((5, 3, 1, 7), (1, 3, 7, 7)),
((1, 3, 3), (4, 1, 3)),
((3,), (3,)),
((1,), (1,))
((1,), (1,)),
]
edge_softmax_shapes = [
(1,), (1, 3), (3, 4, 5)
]
edge_softmax_shapes = [(1,), (1, 3), (3, 4, 5)]
@pytest.mark.parametrize('g', graphs)
@pytest.mark.parametrize('shp', spmm_shapes)
@pytest.mark.parametrize('msg', ['add', 'sub', 'mul', 'div', 'copy_lhs', 'copy_rhs'])
@pytest.mark.parametrize('reducer', ['sum', 'min', 'max'])
@pytest.mark.parametrize("g", graphs)
@pytest.mark.parametrize("shp", spmm_shapes)
@pytest.mark.parametrize(
"msg", ["add", "sub", "mul", "div", "copy_lhs", "copy_rhs"]
)
@pytest.mark.parametrize("reducer", ["sum", "min", "max"])
@parametrize_idtype
def test_spmm(idtype, g, shp, msg, reducer):
g = g.astype(idtype).to(F.ctx())
......@@ -108,64 +121,73 @@ def test_spmm(idtype, g, shp, msg, reducer):
hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1)
he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1)
print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he)))
print("u shape: {}, e shape: {}".format(F.shape(hu), F.shape(he)))
g.srcdata['x'] = F.attach_grad(F.clone(hu))
g.edata['w'] = F.attach_grad(F.clone(he))
print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer))
g.srcdata["x"] = F.attach_grad(F.clone(hu))
g.edata["w"] = F.attach_grad(F.clone(he))
print("SpMM(message func: {}, reduce func: {})".format(msg, reducer))
u = F.attach_grad(F.clone(hu))
e = F.attach_grad(F.clone(he))
with F.record_grad():
v = gspmm(g, msg, reducer, u, e)
if reducer in ['max', 'min']:
if reducer in ["max", "min"]:
v = F.replace_inf_with_zero(v)
if g.number_of_edges() > 0:
F.backward(F.reduce_sum(v))
if msg != 'copy_rhs':
if msg != "copy_rhs":
grad_u = F.grad(u)
if msg != 'copy_lhs':
if msg != "copy_lhs":
grad_e = F.grad(e)
with F.record_grad():
g.update_all(udf_msg[msg], udf_reduce[reducer])
if g.number_of_edges() > 0:
v1 = g.dstdata['v']
v1 = g.dstdata["v"]
assert F.allclose(v, v1)
print('forward passed')
print("forward passed")
F.backward(F.reduce_sum(v1))
if msg != 'copy_rhs':
if reducer in ['min', 'max']: # there might be some numerical errors
rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\
F.reduce_sum(F.abs(grad_u))
if msg != "copy_rhs":
if reducer in [
"min",
"max",
]: # there might be some numerical errors
rate = F.reduce_sum(
F.abs(F.grad(g.srcdata["x"]) - grad_u)
) / F.reduce_sum(F.abs(grad_u))
assert F.as_scalar(rate) < 1e-2, rate
else:
assert F.allclose(F.grad(g.srcdata['x']), grad_u)
if msg != 'copy_lhs':
if reducer in ['min', 'max']:
rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\
F.reduce_sum(F.abs(grad_e))
assert F.allclose(F.grad(g.srcdata["x"]), grad_u)
if msg != "copy_lhs":
if reducer in ["min", "max"]:
rate = F.reduce_sum(
F.abs(F.grad(g.edata["w"]) - grad_e)
) / F.reduce_sum(F.abs(grad_e))
assert F.as_scalar(rate) < 1e-2, rate
else:
assert F.allclose(F.grad(g.edata['w']), grad_e)
print('backward passed')
g.srcdata.pop('x')
g.edata.pop('w')
if 'v' in g.dstdata: g.dstdata.pop('v')
@pytest.mark.parametrize('g', graphs)
@pytest.mark.parametrize('shp', sddmm_shapes)
@pytest.mark.parametrize('lhs_target', ['u', 'v', 'e'])
@pytest.mark.parametrize('rhs_target', ['u', 'v', 'e'])
@pytest.mark.parametrize('msg', ['add', 'sub', 'mul', 'div', 'dot', 'copy_lhs', 'copy_rhs'])
assert F.allclose(F.grad(g.edata["w"]), grad_e)
print("backward passed")
g.srcdata.pop("x")
g.edata.pop("w")
if "v" in g.dstdata:
g.dstdata.pop("v")
@pytest.mark.parametrize("g", graphs)
@pytest.mark.parametrize("shp", sddmm_shapes)
@pytest.mark.parametrize("lhs_target", ["u", "v", "e"])
@pytest.mark.parametrize("rhs_target", ["u", "v", "e"])
@pytest.mark.parametrize(
"msg", ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"]
)
@parametrize_idtype
def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
if lhs_target == rhs_target:
return
g = g.astype(idtype).to(F.ctx())
if dgl.backend.backend_name == 'mxnet' and g.number_of_edges() == 0:
if dgl.backend.backend_name == "mxnet" and g.number_of_edges() == 0:
pytest.skip() # mxnet do not support zero shape tensor
print(g)
print(g.idtype)
......@@ -174,37 +196,37 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
lhs_target,
g.number_of_src_nodes(),
g.number_of_edges(),
g.number_of_dst_nodes())
g.number_of_dst_nodes(),
)
lhs_shp = (len_lhs,) + shp[0]
len_rhs = select(
rhs_target,
g.number_of_src_nodes(),
g.number_of_edges(),
g.number_of_dst_nodes())
g.number_of_dst_nodes(),
)
rhs_shp = (len_rhs,) + shp[1]
feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1)
feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1)
print('lhs shape: {}, rhs shape: {}'.format(F.shape(feat_lhs), F.shape(feat_rhs)))
print(
"lhs shape: {}, rhs shape: {}".format(
F.shape(feat_lhs), F.shape(feat_rhs)
)
)
lhs_frame = select(
lhs_target,
g.srcdata,
g.edata,
g.dstdata)
rhs_frame = select(
rhs_target,
g.srcdata,
g.edata,
g.dstdata)
lhs_frame['x'] = F.attach_grad(F.clone(feat_lhs))
rhs_frame['y'] = F.attach_grad(F.clone(feat_rhs))
msg_func = lhs_target + '_' + msg + '_' + rhs_target
print('SDDMM(message func: {})'.format(msg_func))
lhs_frame = select(lhs_target, g.srcdata, g.edata, g.dstdata)
rhs_frame = select(rhs_target, g.srcdata, g.edata, g.dstdata)
lhs_frame["x"] = F.attach_grad(F.clone(feat_lhs))
rhs_frame["y"] = F.attach_grad(F.clone(feat_rhs))
msg_func = lhs_target + "_" + msg + "_" + rhs_target
print("SDDMM(message func: {})".format(msg_func))
lhs = F.attach_grad(F.clone(feat_lhs))
rhs = F.attach_grad(F.clone(feat_rhs))
with F.record_grad():
e = gsddmm(g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target)
e = gsddmm(
g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target
)
F.backward(F.reduce_sum(e))
grad_lhs = F.grad(lhs)
grad_rhs = F.grad(rhs)
......@@ -212,24 +234,26 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
with F.record_grad():
g.apply_edges(udf_apply_edges[msg_func])
if g.number_of_edges() > 0:
e1 = g.edata['m']
e1 = g.edata["m"]
assert F.allclose(e, e1)
print('forward passed')
print("forward passed")
F.backward(F.reduce_sum(e1))
if msg != 'copy_rhs':
assert F.allclose(F.grad(lhs_frame['x']), grad_lhs)
if msg != 'copy_lhs':
assert F.allclose(F.grad(rhs_frame['y']), grad_rhs)
print('backward passed')
lhs_frame.pop('x')
rhs_frame.pop('y')
if 'm' in g.edata: g.edata.pop('m')
@pytest.mark.parametrize('g', get_cases(['clique']))
@pytest.mark.parametrize('norm_by', ['src', 'dst'])
@pytest.mark.parametrize('shp', edge_softmax_shapes)
if msg != "copy_rhs":
assert F.allclose(F.grad(lhs_frame["x"]), grad_lhs)
if msg != "copy_lhs":
assert F.allclose(F.grad(rhs_frame["y"]), grad_rhs)
print("backward passed")
lhs_frame.pop("x")
rhs_frame.pop("y")
if "m" in g.edata:
g.edata.pop("m")
@pytest.mark.parametrize("g", get_cases(["clique"]))
@pytest.mark.parametrize("norm_by", ["src", "dst"])
@pytest.mark.parametrize("shp", edge_softmax_shapes)
@parametrize_idtype
def test_edge_softmax(g, norm_by, shp, idtype):
g = g.astype(idtype).to(F.ctx())
......@@ -244,21 +268,24 @@ def test_edge_softmax(g, norm_by, shp, idtype):
with F.record_grad():
e2 = F.attach_grad(F.clone(edata))
e2_2d = F.reshape(
e2, (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]))
if norm_by == 'src':
e2,
(g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]),
)
if norm_by == "src":
score2 = F.softmax(e2_2d, 1)
score2 = F.reshape(score2, (-1, *e2.shape[1:]))
if norm_by == 'dst':
if norm_by == "dst":
score2 = F.softmax(e2_2d, 0)
score2 = F.reshape(score2, (-1, *e2.shape[1:]))
assert F.allclose(score1, score2)
print('forward passed')
print("forward passed")
F.backward(F.reduce_sum(score2))
assert F.allclose(F.grad(e2), grad_edata)
print('backward passed')
print("backward passed")
@pytest.mark.parametrize('reducer', ['sum', 'max', 'min', 'mean'])
@pytest.mark.parametrize("reducer", ["sum", "max", "min", "mean"])
def test_segment_reduce(reducer):
ctx = F.ctx()
value = F.tensor(np.random.rand(10, 5))
......@@ -266,14 +293,17 @@ def test_segment_reduce(reducer):
v2 = F.attach_grad(F.clone(value))
seglen = F.tensor([2, 3, 0, 4, 1, 0, 0])
u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx)
v = F.repeat(F.copy_to(F.arange(0, len(seglen), F.int32), ctx),
seglen, dim=0)
v = F.repeat(
F.copy_to(F.arange(0, len(seglen), F.int32), ctx), seglen, dim=0
)
num_nodes = {'_U': len(u), '_V': len(seglen)}
g = dgl.convert.heterograph({('_U', '_E', '_V'): (u, v)}, num_nodes_dict=num_nodes)
num_nodes = {"_U": len(u), "_V": len(seglen)}
g = dgl.convert.heterograph(
{("_U", "_E", "_V"): (u, v)}, num_nodes_dict=num_nodes
)
with F.record_grad():
rst1 = gspmm(g, 'copy_lhs', reducer, v1, None)
if reducer in ['max', 'min']:
rst1 = gspmm(g, "copy_lhs", reducer, v1, None)
if reducer in ["max", "min"]:
rst1 = F.replace_inf_with_zero(rst1)
F.backward(F.reduce_sum(rst1))
grad1 = F.grad(v1)
......@@ -282,24 +312,36 @@ def test_segment_reduce(reducer):
rst2 = segment_reduce(seglen, v2, reducer=reducer)
F.backward(F.reduce_sum(rst2))
assert F.allclose(rst1, rst2)
print('forward passed')
print("forward passed")
grad2 = F.grad(v2)
assert F.allclose(grad1, grad2)
print('backward passed')
print("backward passed")
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256])
@pytest.mark.parametrize('dtype,tol', [(torch.float16,1e-2),(torch.float32,3e-3),(torch.float64,1e-4)])
@pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
@pytest.mark.parametrize(
"dtype,tol",
[(torch.float16, 1e-2), (torch.float32, 3e-3), (torch.float64, 1e-4)],
)
def test_segment_mm(idtype, feat_size, dtype, tol):
if F._default_context_str == 'cpu' and dtype == torch.float16:
pytest.skip("fp16 support for CPU linalg functions has been removed in PyTorch.")
if F._default_context_str == "cpu" and dtype == torch.float16:
pytest.skip(
"fp16 support for CPU linalg functions has been removed in PyTorch."
)
dev = F.ctx()
# input
a = torch.tensor(np.random.rand(100, feat_size)).to(dev).to(dtype)
a.requires_grad_()
b = torch.tensor(np.random.rand(10, feat_size, feat_size + 1)).to(dev).to(dtype)
b = (
torch.tensor(np.random.rand(10, feat_size, feat_size + 1))
.to(dev)
.to(dtype)
)
b.requires_grad_()
seglen_a = torch.tensor([10, 15, 8, 0, 1, 9, 18, 24, 15, 0])
dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev).to(dtype)
......@@ -312,7 +354,7 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
c_t = []
off = 0
for i, l in enumerate(seglen_a):
c_t.append(a[off:off+l] @ b[i])
c_t.append(a[off : off + l] @ b[i])
off += l
c_t = torch.cat(c_t).to(dtype)
a.grad.zero_()
......@@ -325,11 +367,15 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
assert torch.allclose(da, da_t, atol=tol, rtol=tol)
assert torch.allclose(db, db_t, atol=tol, rtol=tol)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256])
@pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
def test_gather_mm_idx_b(idtype, feat_size):
import torch
dev = F.ctx()
# input
a = torch.tensor(np.random.rand(100, feat_size)).to(dev)
......@@ -355,12 +401,16 @@ def test_gather_mm_idx_b(idtype, feat_size):
assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4)
assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256])
@pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
def _test_gather_mm_idx_a(idtype, feat_size):
# TODO(minjie): currently disabled due to bugs in the CUDA kernel. Need to fix it later.
import torch
dev = F.ctx()
# input
a = torch.tensor(np.random.rand(10, feat_size)).to(dev)
......@@ -386,10 +436,16 @@ def _test_gather_mm_idx_a(idtype, feat_size):
assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4)
assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(F._default_context_str == 'gpu', reason="Libxsmm only fit in CPU.")
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@unittest.skipIf(
F._default_context_str == "gpu", reason="Libxsmm only fit in CPU."
)
def test_use_libxsmm_switch():
import torch
g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2]))
x = torch.ones(3, 2, requires_grad=True)
y = torch.arange(1, 13).float().view(6, 2).requires_grad_()
......
import numpy as np
import networkx as nx
import unittest
import scipy.sparse as ssp
import pytest
import dgl
import backend as F
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as ssp
from test_utils import parametrize_idtype
import dgl
D = 5
def generate_graph(grad=False, add_data=True):
g = dgl.DGLGraph().to(F.ctx())
g.add_nodes(10)
......@@ -25,10 +27,11 @@ def generate_graph(grad=False, add_data=True):
if grad:
ncol = F.attach_grad(ncol)
ecol = F.attach_grad(ecol)
g.ndata['h'] = ncol
g.edata['l'] = ecol
g.ndata["h"] = ncol
g.edata["l"] = ecol
return g
def test_edge_subgraph():
# Test when the graph has no node data and edge data.
g = generate_graph(add_data=False)
......@@ -36,22 +39,25 @@ def test_edge_subgraph():
# relabel=True
sg = g.edge_subgraph(eid)
assert F.array_equal(sg.ndata[dgl.NID], F.tensor([0, 2, 4, 5, 1, 9], g.idtype))
assert F.array_equal(
sg.ndata[dgl.NID], F.tensor([0, 2, 4, 5, 1, 9], g.idtype)
)
assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype))
sg.ndata['h'] = F.arange(0, sg.number_of_nodes())
sg.edata['h'] = F.arange(0, sg.number_of_edges())
sg.ndata["h"] = F.arange(0, sg.number_of_nodes())
sg.edata["h"] = F.arange(0, sg.number_of_edges())
# relabel=False
sg = g.edge_subgraph(eid, relabel_nodes=False)
assert g.number_of_nodes() == sg.number_of_nodes()
assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype))
sg.ndata['h'] = F.arange(0, sg.number_of_nodes())
sg.edata['h'] = F.arange(0, sg.number_of_edges())
sg.ndata["h"] = F.arange(0, sg.number_of_nodes())
sg.edata["h"] = F.arange(0, sg.number_of_edges())
def test_subgraph():
g = generate_graph()
h = g.ndata['h']
l = g.edata['l']
h = g.ndata["h"]
l = g.edata["l"]
nid = [0, 2, 3, 6, 7, 9]
sg = g.subgraph(nid)
eid = {2, 3, 4, 5, 10, 11, 12, 13, 16}
......@@ -60,9 +66,9 @@ def test_subgraph():
# the subgraph is empty initially except for NID/EID field
assert len(sg.ndata) == 2
assert len(sg.edata) == 2
sh = sg.ndata['h']
sh = sg.ndata["h"]
assert F.allclose(F.gather_row(h, F.tensor(nid)), sh)
'''
"""
s, d, eid
0, 1, 0
1, 9, 1
......@@ -81,12 +87,13 @@ def test_subgraph():
0, 8, 14
8, 9, 15 3
9, 0, 16 1
'''
assert F.allclose(F.gather_row(l, eid), sg.edata['l'])
"""
assert F.allclose(F.gather_row(l, eid), sg.edata["l"])
# update the node/edge features on the subgraph should NOT
# reflect to the parent graph.
sg.ndata['h'] = F.zeros((6, D))
assert F.allclose(h, g.ndata['h'])
sg.ndata["h"] = F.zeros((6, D))
assert F.allclose(h, g.ndata["h"])
def _test_map_to_subgraph():
g = dgl.DGLGraph()
......@@ -96,6 +103,7 @@ def _test_map_to_subgraph():
v = h.map_to_subgraph_nid([0, 8, 2])
assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers
......@@ -105,29 +113,37 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')])
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
},
idtype=idtype,
device=F.ctx(),
)
for etype in g.etypes:
g.edges[etype].data['weight'] = F.randn((g.num_edges(etype),))
g.edges[etype].data["weight"] = F.randn((g.num_edges(etype),))
assert g.idtype == idtype
assert g.device == F.ctx()
return g
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="MXNet doesn't support bool tensor")
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="MXNet doesn't support bool tensor",
)
@parametrize_idtype
def test_subgraph_mask(idtype):
g = create_test_heterograph(idtype)
g_graph = g['follows']
g_bipartite = g['plays']
g_graph = g["follows"]
g_bipartite = g["plays"]
x = F.randn((3, 5))
y = F.randn((2, 4))
g.nodes['user'].data['h'] = x
g.edges['follows'].data['h'] = y
g.nodes["user"].data["h"] = x
g.edges["follows"].data["h"] = y
def _check_subgraph(g, sg):
assert sg.idtype == g.idtype
......@@ -135,39 +151,57 @@ def test_subgraph_mask(idtype):
assert sg.ntypes == g.ntypes
assert sg.etypes == g.etypes
assert sg.canonical_etypes == g.canonical_etypes
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
F.tensor([1, 2], idtype))
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]),
F.tensor([0], idtype))
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]),
F.tensor([1], idtype))
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]),
F.tensor([1], idtype))
assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]),
F.tensor([1], idtype))
assert sg.number_of_nodes('developer') == 0
assert sg.number_of_edges('develops') == 0
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3])
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2])
sg1 = g.subgraph({'user': F.tensor([False, True, True], dtype=F.bool),
'game': F.tensor([True, False, False, False], dtype=F.bool)})
assert F.array_equal(
F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], idtype)
)
assert F.array_equal(
F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], idtype)
)
assert F.array_equal(
F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], idtype)
)
assert F.array_equal(
F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], idtype)
)
assert F.array_equal(
F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], idtype)
)
assert sg.number_of_nodes("developer") == 0
assert sg.number_of_edges("develops") == 0
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
sg1 = g.subgraph(
{
"user": F.tensor([False, True, True], dtype=F.bool),
"game": F.tensor([True, False, False, False], dtype=F.bool),
}
)
_check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': F.tensor([False, True], dtype=F.bool),
'plays': F.tensor([False, True, False, False], dtype=F.bool),
'wishes': F.tensor([False, True], dtype=F.bool)})
sg2 = g.edge_subgraph(
{
"follows": F.tensor([False, True], dtype=F.bool),
"plays": F.tensor([False, True, False, False], dtype=F.bool),
"wishes": F.tensor([False, True], dtype=F.bool),
}
)
_check_subgraph(g, sg2)
@parametrize_idtype
def test_subgraph1(idtype):
g = create_test_heterograph(idtype)
g_graph = g['follows']
g_bipartite = g['plays']
g_graph = g["follows"]
g_bipartite = g["plays"]
x = F.randn((3, 5))
y = F.randn((2, 4))
g.nodes['user'].data['h'] = x
g.edges['follows'].data['h'] = y
g.nodes["user"].data["h"] = x
g.edges["follows"].data["h"] = y
def _check_subgraph(g, sg):
assert sg.idtype == g.idtype
......@@ -175,42 +209,62 @@ def test_subgraph1(idtype):
assert sg.ntypes == g.ntypes
assert sg.etypes == g.etypes
assert sg.canonical_etypes == g.canonical_etypes
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
F.tensor([1, 2], g.idtype))
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]),
F.tensor([0], g.idtype))
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]),
F.tensor([1], g.idtype))
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]),
F.tensor([1], g.idtype))
assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]),
F.tensor([1], g.idtype))
assert sg.number_of_nodes('developer') == 0
assert sg.number_of_edges('develops') == 0
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3])
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2])
sg1 = g.subgraph({'user': [1, 2], 'game': [0]})
assert F.array_equal(
F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype)
)
assert F.array_equal(
F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype)
)
assert F.array_equal(
F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype)
)
assert F.array_equal(
F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], g.idtype)
)
assert F.array_equal(
F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], g.idtype)
)
assert sg.number_of_nodes("developer") == 0
assert sg.number_of_edges("develops") == 0
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
sg1 = g.subgraph({"user": [1, 2], "game": [0]})
_check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': [1], 'plays': [1], 'wishes': [1]})
sg2 = g.edge_subgraph({"follows": [1], "plays": [1], "wishes": [1]})
_check_subgraph(g, sg2)
# backend tensor input
sg1 = g.subgraph({'user': F.tensor([1, 2], dtype=idtype),
'game': F.tensor([0], dtype=idtype)})
sg1 = g.subgraph(
{
"user": F.tensor([1, 2], dtype=idtype),
"game": F.tensor([0], dtype=idtype),
}
)
_check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': F.tensor([1], dtype=idtype),
'plays': F.tensor([1], dtype=idtype),
'wishes': F.tensor([1], dtype=idtype)})
sg2 = g.edge_subgraph(
{
"follows": F.tensor([1], dtype=idtype),
"plays": F.tensor([1], dtype=idtype),
"wishes": F.tensor([1], dtype=idtype),
}
)
_check_subgraph(g, sg2)
# numpy input
sg1 = g.subgraph({'user': np.array([1, 2]),
'game': np.array([0])})
sg1 = g.subgraph({"user": np.array([1, 2]), "game": np.array([0])})
_check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': np.array([1]),
'plays': np.array([1]),
'wishes': np.array([1])})
sg2 = g.edge_subgraph(
{
"follows": np.array([1]),
"plays": np.array([1]),
"wishes": np.array([1]),
}
)
_check_subgraph(g, sg2)
def _check_subgraph_single_ntype(g, sg, preserve_nodes=False):
......@@ -221,18 +275,25 @@ def test_subgraph1(idtype):
assert sg.canonical_etypes == g.canonical_etypes
if not preserve_nodes:
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
F.tensor([1, 2], g.idtype))
assert F.array_equal(
F.tensor(sg.nodes["user"].data[dgl.NID]),
F.tensor([1, 2], g.idtype),
)
else:
for ntype in sg.ntypes:
assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]),
F.tensor([1], g.idtype))
assert F.array_equal(
F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype)
)
if not preserve_nodes:
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3])
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2])
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
def _check_subgraph_single_etype(g, sg, preserve_nodes=False):
assert sg.ntypes == g.ntypes
......@@ -240,16 +301,22 @@ def test_subgraph1(idtype):
assert sg.canonical_etypes == g.canonical_etypes
if not preserve_nodes:
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
F.tensor([0, 1], g.idtype))
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]),
F.tensor([0], g.idtype))
assert F.array_equal(
F.tensor(sg.nodes["user"].data[dgl.NID]),
F.tensor([0, 1], g.idtype),
)
assert F.array_equal(
F.tensor(sg.nodes["game"].data[dgl.NID]),
F.tensor([0], g.idtype),
)
else:
for ntype in sg.ntypes:
assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]),
F.tensor([0, 1], g.idtype))
assert F.array_equal(
F.tensor(sg.edges["plays"].data[dgl.EID]),
F.tensor([0, 1], g.idtype),
)
sg1_graph = g_graph.subgraph([1, 2])
_check_subgraph_single_ntype(g_graph, sg1_graph)
......@@ -265,222 +332,279 @@ def test_subgraph1(idtype):
def _check_typed_subgraph1(g, sg):
assert g.idtype == sg.idtype
assert g.device == sg.device
assert set(sg.ntypes) == {'user', 'game'}
assert set(sg.etypes) == {'follows', 'plays', 'wishes'}
assert set(sg.ntypes) == {"user", "game"}
assert set(sg.etypes) == {"follows", "plays", "wishes"}
for ntype in sg.ntypes:
assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
for etype in sg.etypes:
src_sg, dst_sg = sg.all_edges(etype=etype, order='eid')
src_g, dst_g = g.all_edges(etype=etype, order='eid')
src_sg, dst_sg = sg.all_edges(etype=etype, order="eid")
src_g, dst_g = g.all_edges(etype=etype, order="eid")
assert F.array_equal(src_sg, src_g)
assert F.array_equal(dst_sg, dst_g)
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'])
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'])
g.nodes['user'].data['h'] = F.scatter_row(g.nodes['user'].data['h'], F.tensor([2]), F.randn((1, 5)))
g.edges['follows'].data['h'] = F.scatter_row(g.edges['follows'].data['h'], F.tensor([1]), F.randn((1, 4)))
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'])
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'])
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"]
)
g.nodes["user"].data["h"] = F.scatter_row(
g.nodes["user"].data["h"], F.tensor([2]), F.randn((1, 5))
)
g.edges["follows"].data["h"] = F.scatter_row(
g.edges["follows"].data["h"], F.tensor([1]), F.randn((1, 4))
)
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"]
)
def _check_typed_subgraph2(g, sg):
assert set(sg.ntypes) == {'developer', 'game'}
assert set(sg.etypes) == {'develops'}
assert set(sg.ntypes) == {"developer", "game"}
assert set(sg.etypes) == {"develops"}
for ntype in sg.ntypes:
assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
for etype in sg.etypes:
src_sg, dst_sg = sg.all_edges(etype=etype, order='eid')
src_g, dst_g = g.all_edges(etype=etype, order='eid')
src_sg, dst_sg = sg.all_edges(etype=etype, order="eid")
src_g, dst_g = g.all_edges(etype=etype, order="eid")
assert F.array_equal(src_sg, src_g)
assert F.array_equal(dst_sg, dst_g)
sg3 = g.node_type_subgraph(['user', 'game'])
sg3 = g.node_type_subgraph(["user", "game"])
_check_typed_subgraph1(g, sg3)
sg4 = g.edge_type_subgraph(['develops'])
sg4 = g.edge_type_subgraph(["develops"])
_check_typed_subgraph2(g, sg4)
sg5 = g.edge_type_subgraph(['follows', 'plays', 'wishes'])
sg5 = g.edge_type_subgraph(["follows", "plays", "wishes"])
_check_typed_subgraph1(g, sg5)
# Test for restricted format
for fmt in ['csr', 'csc', 'coo']:
for fmt in ["csr", "csc", "coo"]:
g = dgl.graph(([0, 1], [1, 2])).formats(fmt)
sg = g.subgraph({g.ntypes[0]: [1, 0]})
nids = F.asnumpy(sg.ndata[dgl.NID])
assert np.array_equal(nids, np.array([1, 0]))
src, dst = sg.edges(order='eid')
src, dst = sg.edges(order="eid")
src = F.asnumpy(src)
dst = F.asnumpy(dst)
assert np.array_equal(src, np.array([1]))
@parametrize_idtype
def test_in_subgraph(idtype):
hg = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
}, idtype=idtype, num_nodes_dict={'user': 5, 'game': 10, 'coin': 8}).to(F.ctx())
subg = dgl.in_subgraph(hg, {'user' : [0,1], 'game' : 0})
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
),
("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
},
idtype=idtype,
num_nodes_dict={"user": 5, "game": 10, "coin": 8},
).to(F.ctx())
subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0})
assert subg.idtype == idtype
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
assert edge_set == {(1,0),(2,0),(3,0),(0,1),(2,1),(3,1)}
u, v = subg['play'].edges()
assert F.array_equal(
hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
)
assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)}
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
assert edge_set == {(0,0)}
u, v = subg['liked-by'].edges()
assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID])
assert edge_set == {(0, 0)}
u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
assert edge_set == {(2,0),(2,1),(1,0),(0,0)}
assert subg['flips'].number_of_edges() == 0
assert F.array_equal(
hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
)
assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)}
assert subg["flips"].number_of_edges() == 0
for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data
# Test store_ids
subg = dgl.in_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False)
for etype in ['follow', 'play', 'liked-by']:
subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False)
for etype in ["follow", "play", "liked-by"]:
assert dgl.EID not in subg.edges[etype].data
for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data
# Test relabel nodes
subg = dgl.in_subgraph(hg, {'user': [0, 1], 'game': 0}, relabel_nodes=True)
subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, relabel_nodes=True)
assert subg.idtype == idtype
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID])
u, v = subg["follow"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
assert F.array_equal(
hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1,0),(2,0),(3,0),(0,1),(2,1),(3,1)}
u, v = subg['play'].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v)
assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID])
assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)}
u, v = subg["play"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v)
assert F.array_equal(
hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(0,0)}
u, v = subg['liked-by'].edges()
old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID])
assert edge_set == {(0, 0)}
u, v = subg["liked-by"].edges()
old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
assert F.array_equal(
hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(2,0),(2,1),(1,0),(0,0)}
assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)}
assert subg.num_nodes("user") == 4
assert subg.num_nodes("game") == 3
assert subg.num_nodes("coin") == 0
assert subg.num_edges("flips") == 0
assert subg.num_nodes('user') == 4
assert subg.num_nodes('game') == 3
assert subg.num_nodes('coin') == 0
assert subg.num_edges('flips') == 0
@parametrize_idtype
def test_out_subgraph(idtype):
hg = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
}, idtype=idtype).to(F.ctx())
subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0})
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
),
("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
},
idtype=idtype,
).to(F.ctx())
subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0})
assert subg.idtype == idtype
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(1,0),(0,1),(0,2)}
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
u, v = subg['play'].edges()
assert edge_set == {(1, 0), (0, 1), (0, 2)}
assert F.array_equal(
hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
)
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0),(0,1),(1,2)}
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
u, v = subg['liked-by'].edges()
assert edge_set == {(0, 0), (0, 1), (1, 2)}
assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID])
u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0)}
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
u, v = subg['flips'].edges()
assert edge_set == {(0, 0)}
assert F.array_equal(
hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
)
u, v = subg["flips"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0),(1,0)}
assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(
hg["flips"].edge_ids(u, v), subg["flips"].edata[dgl.EID]
)
for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data
# Test store_ids
subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}, store_ids=False)
subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False)
for etype in subg.canonical_etypes:
assert dgl.EID not in subg.edges[etype].data
for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data
# Test relabel nodes
subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True)
subg = dgl.out_subgraph(hg, {"user": [1], "game": 0}, relabel_nodes=True)
assert subg.idtype == idtype
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
u, v = subg["follow"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1, 0)}
assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID])
assert F.array_equal(
hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID]
)
u, v = subg['play'].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v)
u, v = subg["play"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1, 2)}
assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID])
assert F.array_equal(
hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID]
)
u, v = subg['liked-by'].edges()
old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v)
u, v = subg["liked-by"].edges()
old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(0,0)}
assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID])
u, v = subg['flips'].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v)
assert edge_set == {(0, 0)}
assert F.array_equal(
hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID]
)
u, v = subg["flips"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["coin"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1,0)}
assert F.array_equal(hg['flips'].edge_ids(old_u, old_v), subg['flips'].edata[dgl.EID])
assert subg.num_nodes('user') == 2
assert subg.num_nodes('game') == 2
assert subg.num_nodes('coin') == 1
assert edge_set == {(1, 0)}
assert F.array_equal(
hg["flips"].edge_ids(old_u, old_v), subg["flips"].edata[dgl.EID]
)
assert subg.num_nodes("user") == 2
assert subg.num_nodes("game") == 2
assert subg.num_nodes("coin") == 1
def test_subgraph_message_passing():
# Unit test for PR #2055
g = dgl.graph(([0, 1, 2], [2, 3, 4])).to(F.cpu())
g.ndata['x'] = F.copy_to(F.randn((5, 6)), F.cpu())
g.ndata["x"] = F.copy_to(F.randn((5, 6)), F.cpu())
sg = g.subgraph([1, 2, 3]).to(F.ctx())
sg.update_all(lambda edges: {'x': edges.src['x']}, lambda nodes: {'y': F.sum(nodes.mailbox['x'], 1)})
sg.update_all(
lambda edges: {"x": edges.src["x"]},
lambda nodes: {"y": F.sum(nodes.mailbox["x"], 1)},
)
@parametrize_idtype
def test_khop_in_subgraph(idtype):
g = dgl.graph(([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]), idtype=idtype, device=F.ctx())
g.edata['w'] = F.tensor([
[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]
])
g = dgl.graph(
([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]), idtype=idtype, device=F.ctx()
)
g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
sg, inv = dgl.khop_in_subgraph(g, 0, k=2)
assert sg.idtype == g.idtype
u, v = sg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(1,0), (1,2), (2,0), (3,2)}
assert F.array_equal(sg.edata[dgl.EID], F.tensor([0, 1, 2, 4], dtype=idtype))
assert F.array_equal(sg.edata['w'], F.tensor([
[0, 1],
[2, 3],
[4, 5],
[8, 9]
]))
assert edge_set == {(1, 0), (1, 2), (2, 0), (3, 2)}
assert F.array_equal(
sg.edata[dgl.EID], F.tensor([0, 1, 2, 4], dtype=idtype)
)
assert F.array_equal(
sg.edata["w"], F.tensor([[0, 1], [2, 3], [4, 5], [8, 9]])
)
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
# Test multiple nodes
......@@ -497,66 +621,70 @@ def test_khop_in_subgraph(idtype):
assert sg.num_edges() == 0
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
g = dgl.heterograph({
('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]),
}, idtype=idtype, device=F.ctx())
sg, inv = dgl.khop_in_subgraph(g, {'game': 0}, k=2)
g = dgl.heterograph(
{
("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]),
("user", "follows", "user"): ([0, 1, 1], [1, 2, 2]),
},
idtype=idtype,
device=F.ctx(),
)
sg, inv = dgl.khop_in_subgraph(g, {"game": 0}, k=2)
assert sg.idtype == idtype
assert sg.num_nodes('game') == 1
assert sg.num_nodes('user') == 2
assert sg.num_nodes("game") == 1
assert sg.num_nodes("user") == 2
assert len(sg.ntypes) == 2
assert len(sg.etypes) == 2
u, v = sg['follows'].edges()
u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)}
u, v = sg['plays'].edges()
u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype))
assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
# Test isolated node
sg, inv = dgl.khop_in_subgraph(g, {'user': 0}, k=2)
sg, inv = dgl.khop_in_subgraph(g, {"user": 0}, k=2)
assert sg.idtype == idtype
assert sg.num_nodes('game') == 0
assert sg.num_nodes('user') == 1
assert sg.num_edges('follows') == 0
assert sg.num_edges('plays') == 0
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype))
assert sg.num_nodes("game") == 0
assert sg.num_nodes("user") == 1
assert sg.num_edges("follows") == 0
assert sg.num_edges("plays") == 0
assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test multiple nodes
sg, inv = dgl.khop_in_subgraph(g, {'user': F.tensor([0, 1], idtype), 'game': 0}, k=1)
u, v = sg['follows'].edges()
sg, inv = dgl.khop_in_subgraph(
g, {"user": F.tensor([0, 1], idtype), "game": 0}, k=1
)
u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)}
u, v = sg['plays'].edges()
u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0, 1], idtype))
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype))
assert F.array_equal(
F.astype(inv["user"], idtype), F.tensor([0, 1], idtype)
)
assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
@parametrize_idtype
def test_khop_out_subgraph(idtype):
g = dgl.graph(([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]), idtype=idtype, device=F.ctx())
g.edata['w'] = F.tensor([
[0, 1],
[2, 3],
[4, 5],
[6, 7],
[8, 9]
])
g = dgl.graph(
([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]), idtype=idtype, device=F.ctx()
)
g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
sg, inv = dgl.khop_out_subgraph(g, 0, k=2)
assert sg.idtype == g.idtype
u, v = sg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,1), (2,1), (0,2), (2,3)}
assert F.array_equal(sg.edata[dgl.EID], F.tensor([0, 2, 1, 4], dtype=idtype))
assert F.array_equal(sg.edata['w'], F.tensor([
[0, 1],
[4, 5],
[2, 3],
[8, 9]
]))
assert edge_set == {(0, 1), (2, 1), (0, 2), (2, 3)}
assert F.array_equal(
sg.edata[dgl.EID], F.tensor([0, 2, 1, 4], dtype=idtype)
)
assert F.array_equal(
sg.edata["w"], F.tensor([[0, 1], [4, 5], [2, 3], [8, 9]])
)
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
# Test multiple nodes
......@@ -573,92 +701,111 @@ def test_khop_out_subgraph(idtype):
assert sg.num_edges() == 0
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
g = dgl.heterograph({
('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
('user', 'follows', 'user'): ([0, 1], [1, 3]),
}, idtype=idtype, device=F.ctx())
sg, inv = dgl.khop_out_subgraph(g, {'user': 0}, k=2)
g = dgl.heterograph(
{
("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]),
("user", "follows", "user"): ([0, 1], [1, 3]),
},
idtype=idtype,
device=F.ctx(),
)
sg, inv = dgl.khop_out_subgraph(g, {"user": 0}, k=2)
assert sg.idtype == idtype
assert sg.num_nodes('game') == 2
assert sg.num_nodes('user') == 3
assert sg.num_nodes("game") == 2
assert sg.num_nodes("user") == 3
assert len(sg.ntypes) == 2
assert len(sg.etypes) == 2
u, v = sg['follows'].edges()
u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1), (1, 2)}
u, v = sg['plays'].edges()
u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0), (1,0), (1,1)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype))
assert edge_set == {(0, 0), (1, 0), (1, 1)}
assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test isolated node
sg, inv = dgl.khop_out_subgraph(g, {'user': 3}, k=2)
sg, inv = dgl.khop_out_subgraph(g, {"user": 3}, k=2)
assert sg.idtype == idtype
assert sg.num_nodes('game') == 0
assert sg.num_nodes('user') == 1
assert sg.num_edges('follows') == 0
assert sg.num_edges('plays') == 0
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype))
assert sg.num_nodes("game") == 0
assert sg.num_nodes("user") == 1
assert sg.num_edges("follows") == 0
assert sg.num_edges("plays") == 0
assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test multiple nodes
sg, inv = dgl.khop_out_subgraph(g, {'user': F.tensor([2], idtype), 'game': 0}, k=1)
assert sg.num_edges('follows') == 0
u, v = sg['plays'].edges()
sg, inv = dgl.khop_out_subgraph(
g, {"user": F.tensor([2], idtype), "game": 0}, k=1
)
assert sg.num_edges("follows") == 0
u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype))
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype))
assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
@unittest.skipIf(not F.gpu_ctx(), 'only necessary with GPU')
@unittest.skipIf(not F.gpu_ctx(), "only necessary with GPU")
@pytest.mark.parametrize(
'parent_idx_device', [('cpu', F.cpu()), ('cuda', F.cuda()), ('uva', F.cpu()), ('uva', F.cuda())])
@pytest.mark.parametrize('child_device', [F.cpu(), F.cuda()])
"parent_idx_device",
[("cpu", F.cpu()), ("cuda", F.cuda()), ("uva", F.cpu()), ("uva", F.cuda())],
)
@pytest.mark.parametrize("child_device", [F.cpu(), F.cuda()])
def test_subframes(parent_idx_device, child_device):
parent_device, idx_device = parent_idx_device
g = dgl.graph((F.tensor([1,2,3], dtype=F.int64), F.tensor([2,3,4], dtype=F.int64)))
g = dgl.graph(
(F.tensor([1, 2, 3], dtype=F.int64), F.tensor([2, 3, 4], dtype=F.int64))
)
print(g.device)
g.ndata['x'] = F.randn((5, 4))
g.edata['a'] = F.randn((3, 6))
g.ndata["x"] = F.randn((5, 4))
g.edata["a"] = F.randn((3, 6))
idx = F.tensor([1, 2], dtype=F.int64)
if parent_device == 'cuda':
if parent_device == "cuda":
g = g.to(F.cuda())
elif parent_device == 'uva':
if F.backend_name != 'pytorch':
elif parent_device == "uva":
if F.backend_name != "pytorch":
pytest.skip("UVA only supported for PyTorch")
g = g.to(F.cpu())
g.create_formats_()
g.pin_memory_()
elif parent_device == 'cpu':
elif parent_device == "cpu":
g = g.to(F.cpu())
idx = F.copy_to(idx, idx_device)
sg = g.sample_neighbors(idx, 2).to(child_device)
assert sg.device == F.context(sg.ndata['x'])
assert sg.device == F.context(sg.edata['a'])
assert sg.device == F.context(sg.ndata["x"])
assert sg.device == F.context(sg.edata["a"])
assert sg.device == child_device
if parent_device != 'uva':
sg = g.to(child_device).sample_neighbors(F.copy_to(idx, child_device), 2)
assert sg.device == F.context(sg.ndata['x'])
assert sg.device == F.context(sg.edata['a'])
if parent_device != "uva":
sg = g.to(child_device).sample_neighbors(
F.copy_to(idx, child_device), 2
)
assert sg.device == F.context(sg.ndata["x"])
assert sg.device == F.context(sg.edata["a"])
assert sg.device == child_device
if parent_device == 'uva':
if parent_device == "uva":
g.unpin_memory_()
@unittest.skipIf(F._default_context_str != "gpu", reason="UVA only available on GPU")
@pytest.mark.parametrize('device', [F.cpu(), F.cuda()])
@unittest.skipIf(dgl.backend.backend_name != "pytorch", reason="UVA only supported for PyTorch")
@unittest.skipIf(
F._default_context_str != "gpu", reason="UVA only available on GPU"
)
@pytest.mark.parametrize("device", [F.cpu(), F.cuda()])
@unittest.skipIf(
dgl.backend.backend_name != "pytorch",
reason="UVA only supported for PyTorch",
)
@parametrize_idtype
def test_uva_subgraph(idtype, device):
g = create_test_heterograph(idtype)
g = g.to(F.cpu())
g.create_formats_()
g.pin_memory_()
indices = {'user': F.copy_to(F.tensor([0], idtype), device)}
edge_indices = {'follows': F.copy_to(F.tensor([0], idtype), device)}
indices = {"user": F.copy_to(F.tensor([0], idtype), device)}
edge_indices = {"follows": F.copy_to(F.tensor([0], idtype), device)}
assert g.subgraph(indices).device == device
assert g.edge_subgraph(edge_indices).device == device
assert g.in_subgraph(indices).device == device
assert g.out_subgraph(indices).device == device
if dgl.backend.backend_name != 'tensorflow':
if dgl.backend.backend_name != "tensorflow":
# (BarclayII) Most of Tensorflow functions somehow do not preserve device: a CPU tensor
# becomes a GPU tensor after operations such as concat(), unique() or even sin().
# Not sure what should be the best fix.
......@@ -667,6 +814,7 @@ def test_uva_subgraph(idtype, device):
assert g.sample_neighbors(indices, 1).device == device
g.unpin_memory_()
if __name__ == '__main__':
if __name__ == "__main__":
test_edge_subgraph()
# test_uva_subgraph(F.int64, F.cpu())
import itertools
import random
import sys
import time
import unittest
import dgl
import backend as F
import networkx as nx
import numpy as np
import scipy.sparse as sp
import backend as F
import itertools
from test_utils import parametrize_idtype
import dgl
np.random.seed(42)
def toset(x):
# F.zerocopy_to_numpy may return a int
return set(F.zerocopy_to_numpy(x).tolist())
@parametrize_idtype
def test_bfs(idtype, n=100):
def _bfs_nx(g_nx, src):
......@@ -59,6 +61,7 @@ def test_bfs(idtype, n=100):
assert len(edges_dgl) == len(edges_nx)
assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
@parametrize_idtype
def test_topological_nodes(idtype, n=100):
a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
......@@ -68,12 +71,13 @@ def test_topological_nodes(idtype, n=100):
layers_dgl = dgl.topological_nodes_generator(g)
adjmat = g.adjacency_matrix(transpose=True)
def tensor_topo_traverse():
n = g.number_of_nodes()
mask = F.copy_to(F.ones((n, 1)), F.cpu())
degree = F.spmm(adjmat, mask)
while F.reduce_sum(mask) != 0.:
v = F.astype((degree == 0.), F.float32)
while F.reduce_sum(mask) != 0.0:
v = F.astype((degree == 0.0), F.float32)
v = v * mask
mask = mask - v
frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu())
......@@ -85,14 +89,18 @@ def test_topological_nodes(idtype, n=100):
assert len(layers_dgl) == len(layers_spmv)
assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
DFS_LABEL_NAMES = ['forward', 'reverse', 'nontree']
DFS_LABEL_NAMES = ["forward", "reverse", "nontree"]
@parametrize_idtype
def test_dfs_labeled_edges(idtype, example=False):
dgl_g = dgl.DGLGraph().astype(idtype)
dgl_g.add_nodes(6)
dgl_g.add_edges([0, 1, 0, 3, 3], [1, 2, 2, 4, 5])
dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator(
dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True)
dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True
)
dgl_edges = [toset(t) for t in dgl_edges]
dgl_labels = [toset(t) for t in dgl_labels]
g1_solutions = [
......@@ -108,10 +116,14 @@ def test_dfs_labeled_edges(idtype, example=False):
def combine_frontiers(sol):
es, ls = zip(*sol)
es = [set(i for i in t if i is not None)
for t in itertools.zip_longest(*es)]
ls = [set(i for i in t if i is not None)
for t in itertools.zip_longest(*ls)]
es = [
set(i for i in t if i is not None)
for t in itertools.zip_longest(*es)
]
ls = [
set(i for i in t if i is not None)
for t in itertools.zip_longest(*ls)
]
return es, ls
for sol_set in itertools.product(g1_solutions, g2_solutions):
......@@ -121,7 +133,8 @@ def test_dfs_labeled_edges(idtype, example=False):
else:
assert False
if __name__ == '__main__':
test_bfs(idtype='int32')
test_topological_nodes(idtype='int32')
test_dfs_labeled_edges(idtype='int32')
if __name__ == "__main__":
test_bfs(idtype="int32")
test_topological_nodes(idtype="int32")
test_dfs_labeled_edges(idtype="int32")
# NOTE(vibwu): Currently cugraph must be imported before torch to avoid a resource cleanup issue.
# See https://github.com/rapidsai/cugraph/issues/2718
import cugraph
import unittest
import backend as F
import dgl
import cugraph
import numpy as np
from dgl import DGLGraph
import unittest
import pytest
import dgl
from dgl import DGLGraph
def test_dummy():
cg = cugraph.Graph()
assert cg is not None
def test_to_cugraph_conversion():
g = dgl.graph((F.tensor([0, 1, 2, 3]), F.tensor([1, 0, 3, 2]))).to('cuda')
g = dgl.graph((F.tensor([0, 1, 2, 3]), F.tensor([1, 0, 3, 2]))).to("cuda")
cugraph_g = g.to_cugraph()
assert cugraph_g.number_of_nodes()==g.number_of_nodes()
assert cugraph_g.number_of_edges()==g.number_of_edges()
assert cugraph_g.number_of_nodes() == g.number_of_nodes()
assert cugraph_g.number_of_edges() == g.number_of_edges()
assert cugraph_g.has_edge(0, 1)
assert cugraph_g.has_edge(1, 0)
assert cugraph_g.has_edge(3, 2)
def test_from_cugraph_conversion():
# cudf is a dependency of cugraph
import cudf
# directed graph conversion test
cugraph_g = cugraph.Graph(directed=True)
df = cudf.DataFrame({"source":[0, 1, 2, 3],
"destination":[1, 2, 3, 2]})
df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]})
cugraph_g.from_cudf_edgelist(df)
g = dgl.from_cugraph(cugraph_g)
assert g.device.type == 'cuda'
assert g.device.type == "cuda"
assert g.number_of_nodes() == cugraph_g.number_of_nodes()
assert g.number_of_edges() == cugraph_g.number_of_edges()
......@@ -50,14 +53,13 @@ def test_from_cugraph_conversion():
# undirected graph conversion test
cugraph_g = cugraph.Graph(directed=False)
df = cudf.DataFrame({"source":[0, 1, 2, 3],
"destination":[1, 2, 3, 2]})
df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]})
cugraph_g.from_cudf_edgelist(df)
g = dgl.from_cugraph(cugraph_g)
assert g.device.type == 'cuda'
assert g.device.type == "cuda"
assert g.number_of_nodes() == cugraph_g.number_of_nodes()
# assert reverse edges are present
assert g.has_edges_between(0, 1)
......
import os
import dgl
import backend as F
from numpy.testing import assert_array_equal
import dgl
INTEGER = 2
STR = 'hello world!'
STR = "hello world!"
HELLO_SERVICE_ID = 901231
TENSOR = F.zeros((1000, 1000), F.int64, F.cpu())
......@@ -47,25 +49,36 @@ class HelloRequest(dgl.distributed.Request):
return res
def start_server(server_id, ip_config, num_servers, num_clients, net_type, keep_alive):
def start_server(
server_id, ip_config, num_servers, num_clients, net_type, keep_alive
):
server_state = dgl.distributed.ServerState(
None, local_g=None, partition_book=None, keep_alive=keep_alive)
None, local_g=None, partition_book=None, keep_alive=keep_alive
)
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse)
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
print("Start server {}".format(server_id))
dgl.distributed.start_server(server_id=server_id,
dgl.distributed.start_server(
server_id=server_id,
ip_config=ip_config,
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state,
net_type=net_type)
net_type=net_type,
)
def start_client(ip_config, num_servers, group_id, net_type):
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse)
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type)
ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
req = HelloRequest(STR, INTEGER, TENSOR, tensor_func)
server_namebook = dgl.distributed.read_ip_config(ip_config, num_servers)
for server_id in server_namebook.keys():
......@@ -102,19 +115,20 @@ def start_client(ip_config, num_servers, group_id, net_type):
def main():
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG')
num_servers = int(os.environ.get('DIST_DGL_TEST_NUM_SERVERS'))
net_type = os.environ.get('DIST_DGL_TEST_NET_TYPE', 'tensorpipe')
if os.environ.get('DIST_DGL_TEST_ROLE', 'server') == 'server':
server_id = int(os.environ.get('DIST_DGL_TEST_SERVER_ID'))
num_clients = int(os.environ.get('DIST_DGL_TEST_NUM_CLIENTS'))
keep_alive = 'DIST_DGL_TEST_KEEP_ALIVE' in os.environ
start_server(server_id, ip_config, num_servers,
num_clients, net_type, keep_alive)
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG")
num_servers = int(os.environ.get("DIST_DGL_TEST_NUM_SERVERS"))
net_type = os.environ.get("DIST_DGL_TEST_NET_TYPE", "tensorpipe")
if os.environ.get("DIST_DGL_TEST_ROLE", "server") == "server":
server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID"))
num_clients = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENTS"))
keep_alive = "DIST_DGL_TEST_KEEP_ALIVE" in os.environ
start_server(
server_id, ip_config, num_servers, num_clients, net_type, keep_alive
)
else:
group_id = int(os.environ.get('DIST_DGL_TEST_GROUP_ID', '0'))
group_id = int(os.environ.get("DIST_DGL_TEST_GROUP_ID", "0"))
start_client(ip_config, num_servers, group_id, net_type)
if __name__ == '__main__':
if __name__ == "__main__":
main()
import dgl
import os
import numpy as np
import dgl
import dgl.backend as F
from dgl.distributed import load_partition_book
mode = os.environ.get('DIST_DGL_TEST_MODE', "")
graph_name = os.environ.get('DIST_DGL_TEST_GRAPH_NAME', 'random_test_graph')
num_part = int(os.environ.get('DIST_DGL_TEST_NUM_PART'))
num_servers_per_machine = int(os.environ.get('DIST_DGL_TEST_NUM_SERVER'))
num_client_per_machine = int(os.environ.get('DIST_DGL_TEST_NUM_CLIENT'))
shared_workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE')
graph_path = os.environ.get('DIST_DGL_TEST_GRAPH_PATH')
part_id = int(os.environ.get('DIST_DGL_TEST_PART_ID'))
net_type = os.environ.get('DIST_DGL_TEST_NET_TYPE')
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt')
mode = os.environ.get("DIST_DGL_TEST_MODE", "")
graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph")
num_part = int(os.environ.get("DIST_DGL_TEST_NUM_PART"))
num_servers_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_SERVER"))
num_client_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENT"))
shared_workspace = os.environ.get("DIST_DGL_TEST_WORKSPACE")
graph_path = os.environ.get("DIST_DGL_TEST_GRAPH_PATH")
part_id = int(os.environ.get("DIST_DGL_TEST_PART_ID"))
net_type = os.environ.get("DIST_DGL_TEST_NET_TYPE")
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
os.environ["DGL_DIST_MODE"] = "distributed"
os.environ['DGL_DIST_MODE'] = 'distributed'
def zeros_init(shape, dtype):
return F.zeros(shape, dtype=dtype, ctx=F.cpu())
def run_server(graph_name, server_id, server_count, num_clients, shared_mem, keep_alive=False):
def run_server(
graph_name,
server_id,
server_count,
num_clients,
shared_mem,
keep_alive=False,
):
# server_count = num_servers_per_machine
g = dgl.distributed.DistGraphServer(server_id, ip_config,
server_count, num_clients,
graph_path + '/{}.json'.format(graph_name),
g = dgl.distributed.DistGraphServer(
server_id,
ip_config,
server_count,
num_clients,
graph_path + "/{}.json".format(graph_name),
disable_shared_mem=not shared_mem,
graph_format=['csc', 'coo'], keep_alive=keep_alive,
net_type=net_type)
print('start server', server_id)
graph_format=["csc", "coo"],
keep_alive=keep_alive,
net_type=net_type,
)
print("start server", server_id)
g.start()
##########################################
############### DistTensor ###############
##########################################
def dist_tensor_test_sanity(data_shape, name=None):
local_rank = dgl.distributed.get_rank() % num_client_per_machine
dist_ten = dgl.distributed.DistTensor(data_shape,
F.int32,
init_func=zeros_init,
name=name)
dist_ten = dgl.distributed.DistTensor(
data_shape, F.int32, init_func=zeros_init, name=name
)
# arbitrary value
stride = 3
pos = (part_id // 2) * num_client_per_machine + local_rank
if part_id % 2 == 0:
dist_ten[pos*stride:(pos+1)*stride] = F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos+1)
dist_ten[pos * stride : (pos + 1) * stride] = F.ones(
(stride, 2), dtype=F.int32, ctx=F.cpu()
) * (pos + 1)
dgl.distributed.client_barrier()
assert F.allclose(dist_ten[pos*stride:(pos+1)*stride],
F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos+1))
assert F.allclose(
dist_ten[pos * stride : (pos + 1) * stride],
F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos + 1),
)
def dist_tensor_test_destroy_recreate(data_shape, name):
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, name, init_func=zeros_init)
dist_ten = dgl.distributed.DistTensor(
data_shape, F.float32, name, init_func=zeros_init
)
del dist_ten
dgl.distributed.client_barrier()
new_shape = (data_shape[0], 4)
dist_ten = dgl.distributed.DistTensor(new_shape, F.float32, name, init_func=zeros_init)
dist_ten = dgl.distributed.DistTensor(
new_shape, F.float32, name, init_func=zeros_init
)
def dist_tensor_test_persistent(data_shape):
dist_ten_name = 'persistent_dist_tensor'
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, dist_ten_name, init_func=zeros_init,
persistent=True)
dist_ten_name = "persistent_dist_tensor"
dist_ten = dgl.distributed.DistTensor(
data_shape,
F.float32,
dist_ten_name,
init_func=zeros_init,
persistent=True,
)
del dist_ten
try:
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, dist_ten_name)
raise Exception('')
dist_ten = dgl.distributed.DistTensor(
data_shape, F.float32, dist_ten_name
)
raise Exception("")
except:
pass
......@@ -86,17 +119,20 @@ def test_dist_tensor(g):
############# DistEmbedding ##############
##########################################
def dist_embedding_check_sanity(num_nodes, optimizer, name=None):
local_rank = dgl.distributed.get_rank() % num_client_per_machine
emb = dgl.distributed.DistEmbedding(num_nodes, 1, name=name, init_func=zeros_init)
emb = dgl.distributed.DistEmbedding(
num_nodes, 1, name=name, init_func=zeros_init
)
lr = 0.001
optim = optimizer(params=[emb], lr=lr)
stride = 3
pos = (part_id // 2) * num_client_per_machine + local_rank
idx = F.arange(pos*stride, (pos+1)*stride)
idx = F.arange(pos * stride, (pos + 1) * stride)
if part_id % 2 == 0:
with F.record_grad():
......@@ -110,41 +146,62 @@ def dist_embedding_check_sanity(num_nodes, optimizer, name=None):
value = emb(idx)
F.allclose(value, F.ones((len(idx), 1), dtype=F.int32, ctx=F.cpu()) * -lr)
not_update_idx = F.arange(((num_part + 1) / 2) * num_client_per_machine * stride, num_nodes)
not_update_idx = F.arange(
((num_part + 1) / 2) * num_client_per_machine * stride, num_nodes
)
value = emb(not_update_idx)
assert np.all(F.asnumpy(value) == np.zeros((len(not_update_idx), 1)))
def dist_embedding_check_existing(num_nodes):
dist_emb_name = "UniqueEmb"
emb = dgl.distributed.DistEmbedding(num_nodes, 1, name=dist_emb_name, init_func=zeros_init)
emb = dgl.distributed.DistEmbedding(
num_nodes, 1, name=dist_emb_name, init_func=zeros_init
)
try:
emb1 = dgl.distributed.DistEmbedding(num_nodes, 2, name=dist_emb_name, init_func=zeros_init)
raise Exception('')
emb1 = dgl.distributed.DistEmbedding(
num_nodes, 2, name=dist_emb_name, init_func=zeros_init
)
raise Exception("")
except:
pass
def test_dist_embedding(g):
num_nodes = g.number_of_nodes(g.ntypes[0])
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad)
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad, name='SomeEmbedding')
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdam, name='SomeEmbedding')
dist_embedding_check_sanity(
num_nodes, dgl.distributed.optim.SparseAdagrad, name="SomeEmbedding"
)
dist_embedding_check_sanity(
num_nodes, dgl.distributed.optim.SparseAdam, name="SomeEmbedding"
)
dist_embedding_check_existing(num_nodes)
if mode == "server":
shared_mem = bool(int(os.environ.get('DIST_DGL_TEST_SHARED_MEM')))
server_id = int(os.environ.get('DIST_DGL_TEST_SERVER_ID'))
run_server(graph_name, server_id, server_count=num_servers_per_machine,
num_clients=num_part*num_client_per_machine, shared_mem=shared_mem, keep_alive=False)
shared_mem = bool(int(os.environ.get("DIST_DGL_TEST_SHARED_MEM")))
server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID"))
run_server(
graph_name,
server_id,
server_count=num_servers_per_machine,
num_clients=num_part * num_client_per_machine,
shared_mem=shared_mem,
keep_alive=False,
)
elif mode == "client":
os.environ['DGL_NUM_SERVER'] = str(num_servers_per_machine)
os.environ["DGL_NUM_SERVER"] = str(num_servers_per_machine)
dgl.distributed.initialize(ip_config, net_type=net_type)
gpb, graph_name, _, _ = load_partition_book(graph_path + '/{}.json'.format(graph_name), part_id, None)
gpb, graph_name, _, _ = load_partition_book(
graph_path + "/{}.json".format(graph_name), part_id, None
)
g = dgl.distributed.DistGraph(graph_name, gpb=gpb)
target_func_map = {"DistTensor": test_dist_tensor,
target_func_map = {
"DistTensor": test_dist_tensor,
"DistEmbedding": test_dist_embedding,
}
......@@ -158,4 +215,3 @@ elif mode == "client":
else:
print("DIST_DGL_TEST_MODE has to be either server or client")
exit(1)
import os
import unittest
from utils import execute_remote, get_ips
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_tensorpipe_comm():
base_dir = os.environ.get('DIST_DGL_TEST_CPP_BIN_DIR', '.')
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt')
client_bin = os.path.join(base_dir, 'rpc_client')
server_bin = os.path.join(base_dir, 'rpc_server')
base_dir = os.environ.get("DIST_DGL_TEST_CPP_BIN_DIR", ".")
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
client_bin = os.path.join(base_dir, "rpc_client")
server_bin = os.path.join(base_dir, "rpc_server")
ips = get_ips(ip_config)
num_machines = len(ips)
procs = []
for ip in ips:
procs.append(execute_remote(server_bin + " " +
str(num_machines) + " " + ip, ip))
procs.append(
execute_remote(server_bin + " " + str(num_machines) + " " + ip, ip)
)
for ip in ips:
procs.append(execute_remote(client_bin + " " + ip_config, ip))
for p in procs:
......
import multiprocessing as mp
import os
import subprocess
import unittest
import numpy as np
import pytest
import multiprocessing as mp
import subprocess
import utils
import dgl
import numpy as np
import dgl.backend as F
from dgl.distributed import partition_graph
graph_name = os.environ.get('DIST_DGL_TEST_GRAPH_NAME', 'random_test_graph')
target = os.environ.get('DIST_DGL_TEST_OBJECT_TYPE', '')
shared_workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE')
graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph")
target = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "")
shared_workspace = os.environ.get("DIST_DGL_TEST_WORKSPACE")
def create_graph(num_part, dist_graph_path, hetero):
if not hetero:
g = dgl.rand_graph(10000, 42000)
g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
g.ndata["feat"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["feat"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_part, dist_graph_path)
else:
from scipy import sparse as spsp
num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
etypes = [('n1', 'r1', 'n2'),
('n1', 'r2', 'n3'),
('n2', 'r3', 'n3')]
num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020}
etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
edges = {}
for etype in etypes:
src_ntype, _, dst_ntype = etype
arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo',
random_state=100)
arr = spsp.random(
num_nodes[src_ntype],
num_nodes[dst_ntype],
density=0.001,
format="coo",
random_state=100,
)
edges[etype] = (arr.row, arr.col)
g = dgl.heterograph(edges, num_nodes)
g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1)
g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1)
g.nodes["n1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_nodes("n1")), 1
)
g.edges["r1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_edges("r1")), 1
)
partition_graph(g, graph_name, num_part, dist_graph_path)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['tensorpipe', 'socket'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["tensorpipe", "socket"])
@pytest.mark.parametrize("num_servers", [1, 4])
@pytest.mark.parametrize("num_clients", [1, 4])
@pytest.mark.parametrize("hetero", [False, True])
@pytest.mark.parametrize("shared_mem", [False, True])
def test_dist_objects(net_type, num_servers, num_clients, hetero, shared_mem):
if not shared_mem and num_servers > 1:
pytest.skip(f"Backup servers are not supported when shared memory is disabled")
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt')
workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE', '/shared_workspace/dgl_dist_tensor_test/')
pytest.skip(
f"Backup servers are not supported when shared memory is disabled"
)
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
workspace = os.environ.get(
"DIST_DGL_TEST_WORKSPACE", "/shared_workspace/dgl_dist_tensor_test/"
)
ips = utils.get_ips(ip_config)
num_part = len(ips)
test_bin = os.path.join(os.environ.get(
'DIST_DGL_TEST_PY_BIN_DIR', '.'), 'run_dist_objects.py')
test_bin = os.path.join(
os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "run_dist_objects.py"
)
dist_graph_path = os.path.join(workspace, 'hetero_dist_graph' if hetero else 'dist_graph')
dist_graph_path = os.path.join(
workspace, "hetero_dist_graph" if hetero else "dist_graph"
)
if not os.path.isdir(dist_graph_path):
create_graph(num_part, dist_graph_path, hetero)
base_envs = f"DIST_DGL_TEST_WORKSPACE={workspace} " \
f"DIST_DGL_TEST_NUM_PART={num_part} " \
f"DIST_DGL_TEST_NUM_SERVER={num_servers} " \
f"DIST_DGL_TEST_NUM_CLIENT={num_clients} " \
f"DIST_DGL_TEST_NET_TYPE={net_type} " \
f"DIST_DGL_TEST_GRAPH_PATH={dist_graph_path} " \
base_envs = (
f"DIST_DGL_TEST_WORKSPACE={workspace} "
f"DIST_DGL_TEST_NUM_PART={num_part} "
f"DIST_DGL_TEST_NUM_SERVER={num_servers} "
f"DIST_DGL_TEST_NUM_CLIENT={num_clients} "
f"DIST_DGL_TEST_NET_TYPE={net_type} "
f"DIST_DGL_TEST_GRAPH_PATH={dist_graph_path} "
f"DIST_DGL_TEST_IP_CONFIG={ip_config} "
)
procs = []
# Start server
server_id = 0
for part_id, ip in enumerate(ips):
for _ in range(num_servers):
cmd_envs = base_envs + \
f"DIST_DGL_TEST_SERVER_ID={server_id} " \
f"DIST_DGL_TEST_PART_ID={part_id} " \
f"DIST_DGL_TEST_SHARED_MEM={str(int(shared_mem))} " \
cmd_envs = (
base_envs + f"DIST_DGL_TEST_SERVER_ID={server_id} "
f"DIST_DGL_TEST_PART_ID={part_id} "
f"DIST_DGL_TEST_SHARED_MEM={str(int(shared_mem))} "
f"DIST_DGL_TEST_MODE=server "
procs.append(utils.execute_remote(
f"{cmd_envs} python3 {test_bin}",
ip))
)
procs.append(
utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip)
)
server_id += 1
# Start client processes
for part_id, ip in enumerate(ips):
for _ in range(num_clients):
cmd_envs = base_envs + \
f"DIST_DGL_TEST_PART_ID={part_id} " \
f"DIST_DGL_TEST_OBJECT_TYPE={target} " \
cmd_envs = (
base_envs + f"DIST_DGL_TEST_PART_ID={part_id} "
f"DIST_DGL_TEST_OBJECT_TYPE={target} "
f"DIST_DGL_TEST_MODE=client "
procs.append(utils.execute_remote(
f"{cmd_envs} python3 {test_bin}",
ip))
)
procs.append(
utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip)
)
for p in procs:
p.join()
assert p.exitcode == 0
import multiprocessing as mp
import os
import unittest
import pytest
import multiprocessing as mp
import utils
dgl_envs = f"PYTHONUNBUFFERED=1 DMLC_LOG_DEBUG=1 DGLBACKEND={os.environ.get('DGLBACKEND')} DGL_LIBRARY_PATH={os.environ.get('DGL_LIBRARY_PATH')} PYTHONPATH={os.environ.get('PYTHONPATH')} "
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_rpc(net_type):
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt')
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
num_clients = 1
num_servers = 1
ips = utils.get_ips(ip_config)
num_machines = len(ips)
test_bin = os.path.join(os.environ.get(
'DIST_DGL_TEST_PY_BIN_DIR', '.'), 'rpc_basic.py')
base_envs = dgl_envs + \
f" DGL_DIST_MODE=distributed DIST_DGL_TEST_IP_CONFIG={ip_config} DIST_DGL_TEST_NUM_SERVERS={num_servers} DIST_DGL_TEST_NET_TYPE={net_type} "
test_bin = os.path.join(
os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "rpc_basic.py"
)
base_envs = (
dgl_envs
+ f" DGL_DIST_MODE=distributed DIST_DGL_TEST_IP_CONFIG={ip_config} DIST_DGL_TEST_NUM_SERVERS={num_servers} DIST_DGL_TEST_NET_TYPE={net_type} "
)
procs = []
# start server processes
server_id = 0
for ip in ips:
for _ in range(num_servers):
server_envs = base_envs + \
f" DIST_DGL_TEST_ROLE=server DIST_DGL_TEST_SERVER_ID={server_id} DIST_DGL_TEST_NUM_CLIENTS={num_clients * num_machines} "
procs.append(utils.execute_remote(
server_envs + " python3 " + test_bin, ip))
server_envs = (
base_envs
+ f" DIST_DGL_TEST_ROLE=server DIST_DGL_TEST_SERVER_ID={server_id} DIST_DGL_TEST_NUM_CLIENTS={num_clients * num_machines} "
)
procs.append(
utils.execute_remote(server_envs + " python3 " + test_bin, ip)
)
server_id += 1
# start client processes
client_envs = base_envs + " DIST_DGL_TEST_ROLE=client DIST_DGL_TEST_GROUP_ID=0 "
client_envs = (
base_envs + " DIST_DGL_TEST_ROLE=client DIST_DGL_TEST_GROUP_ID=0 "
)
for ip in ips:
for _ in range(num_clients):
procs.append(utils.execute_remote(
client_envs + " python3 "+test_bin, ip))
procs.append(
utils.execute_remote(client_envs + " python3 " + test_bin, ip)
)
for p in procs:
p.join()
assert p.exitcode == 0
import subprocess
import multiprocessing as mp
from typing import Optional
import os
import subprocess
from typing import Optional
def run(ssh_cmd):
subprocess.check_call(ssh_cmd, shell=True)
def execute_remote(
cmd: str,
ip: str,
port: Optional[int] = 22,
username: Optional[str] = ""
cmd: str, ip: str, port: Optional[int] = 22, username: Optional[str] = ""
) -> mp.Process:
"""Execute command line on remote machine via ssh.
......@@ -30,18 +28,18 @@ def execute_remote(
if username:
ip_prefix += "{username}@".format(username=username)
custom_port = os.getenv('DIST_DGL_TEST_SSH_PORT', '')
custom_port = os.getenv("DIST_DGL_TEST_SSH_PORT", "")
if custom_port:
port = custom_port
custom_ssh_key = os.getenv('DIST_DGL_TEST_SSH_KEY', '')
custom_ssh_key = os.getenv("DIST_DGL_TEST_SSH_KEY", "")
if custom_ssh_key:
custom_ssh_key = os.path.expanduser(custom_ssh_key)
custom_ssh_key = "-i " + custom_ssh_key
ssh_setup = os.getenv('DIST_DGL_TEST_SSH_SETUP', '')
ssh_setup = os.getenv("DIST_DGL_TEST_SSH_SETUP", "")
if ssh_setup:
cmd = ssh_setup + ';' + cmd
cmd = ssh_setup + ";" + cmd
# Construct ssh command that executes `cmd` on the remote host
ssh_cmd = "ssh -o StrictHostKeyChecking=no {ssh_key} -p {port} {ip_prefix}{ip} '{cmd}'".format(
ssh_key=custom_ssh_key,
......@@ -50,11 +48,12 @@ def execute_remote(
ip=ip,
cmd=cmd,
)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
proc = ctx.Process(target=run, args=(ssh_cmd,))
proc.start()
return proc
def get_ips(ip_config):
ips = []
with open(ip_config) as f:
......@@ -62,6 +61,7 @@ def get_ips(ip_config):
result = line.strip().split()
if len(result) != 1:
raise RuntimeError(
"Invalid format of ip_config:{}".format(ip_config))
"Invalid format of ip_config:{}".format(ip_config)
)
ips.append(result[0])
return ips
import os
os.environ['OMP_NUM_THREADS'] = '1'
import dgl
os.environ["OMP_NUM_THREADS"] = "1"
import math
import multiprocessing as mp
import pickle
import socket
import sys
import numpy as np
import time
import socket
from scipy import sparse as spsp
from numpy.testing import assert_array_equal
from multiprocessing import Process, Manager, Condition, Value
import multiprocessing as mp
from dgl.heterograph_index import create_unitgraph_from_coo
from dgl.data.utils import load_graphs, save_graphs
from dgl.distributed import DistGraphServer, DistGraph
from dgl.distributed import partition_graph, load_partition, load_partition_book, node_split, edge_split
from numpy.testing import assert_almost_equal
import backend as F
import math
import unittest
import pickle
from utils import reset_envs, generate_ip_config, create_random_graph
from multiprocessing import Condition, Manager, Process, Value
import backend as F
import numpy as np
import pytest
from numpy.testing import assert_almost_equal, assert_array_equal
from scipy import sparse as spsp
from utils import create_random_graph, generate_ip_config, reset_envs
if os.name != 'nt':
import dgl
from dgl.data.utils import load_graphs, save_graphs
from dgl.distributed import (
DistGraph,
DistGraphServer,
edge_split,
load_partition,
load_partition_book,
node_split,
partition_graph,
)
from dgl.heterograph_index import create_unitgraph_from_coo
if os.name != "nt":
import fcntl
import struct
def run_server(graph_name, server_id, server_count, num_clients, shared_mem, keep_alive=False):
g = DistGraphServer(server_id, "kv_ip_config.txt", server_count, num_clients,
'/tmp/dist_graph/{}.json'.format(graph_name),
def run_server(
graph_name,
server_id,
server_count,
num_clients,
shared_mem,
keep_alive=False,
):
g = DistGraphServer(
server_id,
"kv_ip_config.txt",
server_count,
num_clients,
"/tmp/dist_graph/{}.json".format(graph_name),
disable_shared_mem=not shared_mem,
graph_format=['csc', 'coo'], keep_alive=keep_alive)
print('start server', server_id)
graph_format=["csc", "coo"],
keep_alive=keep_alive,
)
print("start server", server_id)
# verify dtype of underlying graph
cg = g.client_g
for k, dtype in dgl.distributed.dist_graph.FIELD_DICT.items():
if k in cg.ndata:
assert F.dtype(
cg.ndata[k]) == dtype, "Data type of {} in ndata should be {}.".format(k, dtype)
assert (
F.dtype(cg.ndata[k]) == dtype
), "Data type of {} in ndata should be {}.".format(k, dtype)
if k in cg.edata:
assert F.dtype(
cg.edata[k]) == dtype, "Data type of {} in edata should be {}.".format(k, dtype)
assert (
F.dtype(cg.edata[k]) == dtype
), "Data type of {} in edata should be {}.".format(k, dtype)
g.start()
def emb_init(shape, dtype):
return F.zeros(shape, dtype, F.cpu())
def rand_init(shape, dtype):
return F.tensor(np.random.normal(size=shape), F.float32)
def check_dist_graph_empty(g, num_clients, num_nodes, num_edges):
# Test API
assert g.number_of_nodes() == num_nodes
......@@ -55,60 +83,80 @@ def check_dist_graph_empty(g, num_clients, num_nodes, num_edges):
# Test init node data
new_shape = (g.number_of_nodes(), 2)
g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32)
g.ndata["test1"] = dgl.distributed.DistTensor(new_shape, F.int32)
nids = F.arange(0, int(g.number_of_nodes() / 2))
feats = g.ndata['test1'][nids]
feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 0)
# create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init)
test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3')
test3 = dgl.distributed.DistTensor(
(g.number_of_nodes(), 3), F.float32, "test3"
)
del test3
# Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.ndata['test1'][nids] = new_feats
feats = g.ndata['test1'][nids]
g.ndata["test1"][nids] = new_feats
feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations.
assert g.node_attr_schemes()['test1'].dtype == F.int32
assert g.node_attr_schemes()["test1"].dtype == F.int32
print('end')
print("end")
def run_client_empty(graph_name, part_id, server_count, num_clients, num_nodes, num_edges):
os.environ['DGL_NUM_SERVER'] = str(server_count)
def run_client_empty(
graph_name, part_id, server_count, num_clients, num_nodes, num_edges
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_empty(g, num_clients, num_nodes, num_edges)
def check_server_client_empty(shared_mem, num_servers, num_clients):
prepare_dist(num_servers)
g = create_random_graph(10000)
# Partition the graph
num_parts = 1
graph_name = 'dist_graph_test_1'
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = "dist_graph_test_1"
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine.
serv_ps = []
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
num_clients, shared_mem))
p = ctx.Process(
target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p)
p.start()
cli_ps = []
for cli_id in range(num_clients):
print('start client', cli_id)
p = ctx.Process(target=run_client_empty, args=(graph_name, 0, num_servers, num_clients,
g.number_of_nodes(), g.number_of_edges()))
print("start client", cli_id)
p = ctx.Process(
target=run_client_empty,
args=(
graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
),
)
p.start()
cli_ps.append(p)
......@@ -118,45 +166,79 @@ def check_server_client_empty(shared_mem, num_servers, num_clients):
for p in serv_ps:
p.join()
print('clients have terminated')
print("clients have terminated")
def run_client(graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id):
os.environ['DGL_NUM_SERVER'] = str(server_count)
os.environ['DGL_GROUP_ID'] = str(group_id)
def run_client(
graph_name,
part_id,
server_count,
num_clients,
num_nodes,
num_edges,
group_id,
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph(g, num_clients, num_nodes, num_edges)
def run_emb_client(graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id):
os.environ['DGL_NUM_SERVER'] = str(server_count)
os.environ['DGL_GROUP_ID'] = str(group_id)
def run_emb_client(
graph_name,
part_id,
server_count,
num_clients,
num_nodes,
num_edges,
group_id,
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_emb(g, num_clients, num_nodes, num_edges)
def run_client_hierarchy(graph_name, part_id, server_count, node_mask, edge_mask, return_dict):
os.environ['DGL_NUM_SERVER'] = str(server_count)
def run_client_hierarchy(
graph_name, part_id, server_count, node_mask, edge_mask, return_dict
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb)
node_mask = F.tensor(node_mask)
edge_mask = F.tensor(edge_mask)
nodes = node_split(node_mask, g.get_partition_book(), node_trainer_ids=g.ndata['trainer_id'])
edges = edge_split(edge_mask, g.get_partition_book(), edge_trainer_ids=g.edata['trainer_id'])
nodes = node_split(
node_mask,
g.get_partition_book(),
node_trainer_ids=g.ndata["trainer_id"],
)
edges = edge_split(
edge_mask,
g.get_partition_book(),
edge_trainer_ids=g.edata["trainer_id"],
)
rank = g.rank()
return_dict[rank] = (nodes, edges)
def check_dist_emb(g, num_clients, num_nodes, num_edges):
from dgl.distributed.optim import SparseAdagrad
from dgl.distributed import DistEmbedding
from dgl.distributed.optim import SparseAdagrad
# Test sparse emb
try:
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init)
emb = DistEmbedding(g.number_of_nodes(), 1, "emb1", emb_init)
nids = F.arange(0, int(g.number_of_nodes()))
lr = 0.001
optimizer = SparseAdagrad([emb], lr=lr)
......@@ -173,14 +255,18 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
feats1 = emb(rest)
assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book())
grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), 1), F.float32,
'emb1_sum', policy)
policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book())
grad_sum = dgl.distributed.DistTensor(
(g.number_of_nodes(), 1), F.float32, "emb1_sum", policy
)
if num_clients == 1:
assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients)
assert np.all(
F.asnumpy(grad_sum[nids])
== np.ones((len(nids), 1)) * num_clients
)
assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init)
emb = DistEmbedding(g.number_of_nodes(), 1, "emb2", emb_init)
with F.no_grad():
feats1 = emb(nids)
assert np.all(F.asnumpy(feats1) == 0)
......@@ -197,7 +283,9 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
with F.no_grad():
feats = emb(nids)
if num_clients == 1:
assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr)
assert_almost_equal(
F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr
)
rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
feats1 = emb(rest)
assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
......@@ -207,6 +295,7 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
print(e)
sys.exit(-1)
def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test API
assert g.number_of_nodes() == num_nodes
......@@ -214,13 +303,13 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test reading node data
nids = F.arange(0, int(g.number_of_nodes() / 2))
feats1 = g.ndata['features'][nids]
feats1 = g.ndata["features"][nids]
feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == nids))
# Test reading edge data
eids = F.arange(0, int(g.number_of_edges() / 2))
feats1 = g.edata['features'][eids]
feats1 = g.edata["features"][eids]
feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == eids))
......@@ -232,53 +321,68 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test init node data
new_shape = (g.number_of_nodes(), 2)
test1 = dgl.distributed.DistTensor(new_shape, F.int32)
g.ndata['test1'] = test1
feats = g.ndata['test1'][nids]
g.ndata["test1"] = test1
feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 0)
assert test1.count_nonzero() == 0
# reference to a one that exists
test2 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2', init_func=rand_init)
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2')
test2 = dgl.distributed.DistTensor(
new_shape, F.float32, "test2", init_func=rand_init
)
test3 = dgl.distributed.DistTensor(new_shape, F.float32, "test2")
assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids]))
# create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init)
test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3')
test3 = dgl.distributed.DistTensor(
(g.number_of_nodes(), 3), F.float32, "test3"
)
del test3
# add tests for anonymous distributed tensor.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test3 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
data = test3[0:10]
test4 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test4 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
del test3
test5 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test5 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
assert np.sum(F.asnumpy(test5[0:10] != data)) > 0
# test a persistent tesnor
test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init,
persistent=True)
test4 = dgl.distributed.DistTensor(
new_shape, F.float32, "test4", init_func=rand_init, persistent=True
)
del test4
try:
test4 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test4')
raise Exception('')
test4 = dgl.distributed.DistTensor(
(g.number_of_nodes(), 3), F.float32, "test4"
)
raise Exception("")
except:
pass
# Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.ndata['test1'][nids] = new_feats
feats = g.ndata['test1'][nids]
g.ndata["test1"][nids] = new_feats
feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations.
assert len(g.ndata['features']) == g.number_of_nodes()
assert g.ndata['features'].shape == (g.number_of_nodes(), 1)
assert g.ndata['features'].dtype == F.int64
assert g.node_attr_schemes()['features'].dtype == F.int64
assert g.node_attr_schemes()['test1'].dtype == F.int32
assert g.node_attr_schemes()['features'].shape == (1,)
assert len(g.ndata["features"]) == g.number_of_nodes()
assert g.ndata["features"].shape == (g.number_of_nodes(), 1)
assert g.ndata["features"].dtype == F.int64
assert g.node_attr_schemes()["features"].dtype == F.int64
assert g.node_attr_schemes()["test1"].dtype == F.int32
assert g.node_attr_schemes()["features"].shape == (1,)
selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
# Test node split
......@@ -289,38 +393,60 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
for n in nodes:
assert n in local_nids
print('end')
print("end")
def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_groups=1):
def check_dist_emb_server_client(
shared_mem, num_servers, num_clients, num_groups=1
):
prepare_dist(num_servers)
g = create_random_graph(10000)
# Partition the graph
num_parts = 1
graph_name = f'check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}'
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = (
f"check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}"
)
g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine.
serv_ps = []
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
keep_alive = num_groups > 1
for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
num_clients, shared_mem, keep_alive))
p = ctx.Process(
target=run_server,
args=(
graph_name,
serv_id,
num_servers,
num_clients,
shared_mem,
keep_alive,
),
)
serv_ps.append(p)
p.start()
cli_ps = []
for cli_id in range(num_clients):
for group_id in range(num_groups):
print('start client[{}] for group[{}]'.format(cli_id, group_id))
p = ctx.Process(target=run_emb_client, args=(graph_name, 0, num_servers, num_clients,
print("start client[{}] for group[{}]".format(cli_id, group_id))
p = ctx.Process(
target=run_emb_client,
args=(
graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
group_id))
group_id,
),
)
p.start()
time.sleep(1) # avoid race condition when instantiating DistGraph
cli_ps.append(p)
......@@ -337,7 +463,8 @@ def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_group
for p in serv_ps:
p.join()
print('clients have terminated')
print("clients have terminated")
def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
prepare_dist(num_servers)
......@@ -345,19 +472,28 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
# Partition the graph
num_parts = 1
graph_name = f'check_server_client_{shared_mem}_{num_servers}_{num_clients}_{num_groups}'
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = f"check_server_client_{shared_mem}_{num_servers}_{num_clients}_{num_groups}"
g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine.
serv_ps = []
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
keep_alive = num_groups > 1
for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
num_clients, shared_mem, keep_alive))
p = ctx.Process(
target=run_server,
args=(
graph_name,
serv_id,
num_servers,
num_clients,
shared_mem,
keep_alive,
),
)
serv_ps.append(p)
p.start()
......@@ -365,9 +501,19 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
cli_ps = []
for cli_id in range(num_clients):
for group_id in range(num_groups):
print('start client[{}] for group[{}]'.format(cli_id, group_id))
p = ctx.Process(target=run_client, args=(graph_name, 0, num_servers, num_clients, g.number_of_nodes(),
g.number_of_edges(), group_id))
print("start client[{}] for group[{}]".format(cli_id, group_id))
p = ctx.Process(
target=run_client,
args=(
graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
group_id,
),
)
p.start()
time.sleep(1) # avoid race condition when instantiating DistGraph
cli_ps.append(p)
......@@ -382,7 +528,8 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
for p in serv_ps:
p.join()
print('clients have terminated')
print("clients have terminated")
def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
prepare_dist(num_servers)
......@@ -390,18 +537,26 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
# Partition the graph
num_parts = 1
graph_name = 'dist_graph_test_2'
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients)
graph_name = "dist_graph_test_2"
g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(
g,
graph_name,
num_parts,
"/tmp/dist_graph",
num_trainers_per_machine=num_clients,
)
# let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine.
serv_ps = []
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
num_clients, shared_mem))
p = ctx.Process(
target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p)
p.start()
......@@ -410,16 +565,29 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
return_dict = manager.dict()
node_mask = np.zeros((g.number_of_nodes(),), np.int32)
edge_mask = np.zeros((g.number_of_edges(),), np.int32)
nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False)
edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False)
nodes = np.random.choice(
g.number_of_nodes(), g.number_of_nodes() // 10, replace=False
)
edges = np.random.choice(
g.number_of_edges(), g.number_of_edges() // 10, replace=False
)
node_mask[nodes] = 1
edge_mask[edges] = 1
nodes = np.sort(nodes)
edges = np.sort(edges)
for cli_id in range(num_clients):
print('start client', cli_id)
p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers,
node_mask, edge_mask, return_dict))
print("start client", cli_id)
p = ctx.Process(
target=run_client_hierarchy,
args=(
graph_name,
0,
num_servers,
node_mask,
edge_mask,
return_dict,
),
)
p.start()
cli_ps.append(p)
......@@ -438,33 +606,45 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
assert np.all(F.asnumpy(nodes1) == nodes)
assert np.all(F.asnumpy(edges1) == edges)
print('clients have terminated')
print("clients have terminated")
def run_client_hetero(graph_name, part_id, server_count, num_clients, num_nodes, num_edges):
os.environ['DGL_NUM_SERVER'] = str(server_count)
def run_client_hetero(
graph_name, part_id, server_count, num_clients, num_nodes, num_edges
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
part_id, None)
gpb, graph_name, _, _ = load_partition_book(
"/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_hetero(g, num_clients, num_nodes, num_edges)
def create_random_hetero():
num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
etypes = [('n1', 'r1', 'n2'),
('n1', 'r2', 'n3'),
('n2', 'r3', 'n3')]
num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020}
etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
edges = {}
for etype in etypes:
src_ntype, _, dst_ntype = etype
arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo',
random_state=100)
arr = spsp.random(
num_nodes[src_ntype],
num_nodes[dst_ntype],
density=0.001,
format="coo",
random_state=100,
)
edges[etype] = (arr.row, arr.col)
g = dgl.heterograph(edges, num_nodes)
g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1)
g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1)
g.nodes["n1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_nodes("n1")), 1
)
g.edges["r1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_edges("r1")), 1
)
return g
def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
# Test API
for ntype in num_nodes:
......@@ -473,9 +653,7 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
for etype in num_edges:
assert etype in g.etypes
assert num_edges[etype] == g.number_of_edges(etype)
etypes = [('n1', 'r1', 'n2'),
('n1', 'r2', 'n3'),
('n2', 'r3', 'n3')]
etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
for i, etype in enumerate(g.canonical_etypes):
assert etype[0] == etypes[i][0]
assert etype[1] == etypes[i][1]
......@@ -484,76 +662,92 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
assert g.number_of_edges() == sum([num_edges[etype] for etype in num_edges])
# Test reading node data
nids = F.arange(0, int(g.number_of_nodes('n1') / 2))
feats1 = g.nodes['n1'].data['feat'][nids]
nids = F.arange(0, int(g.number_of_nodes("n1") / 2))
feats1 = g.nodes["n1"].data["feat"][nids]
feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == nids))
# Test reading edge data
eids = F.arange(0, int(g.number_of_edges('r1') / 2))
feats1 = g.edges['r1'].data['feat'][eids]
eids = F.arange(0, int(g.number_of_edges("r1") / 2))
feats1 = g.edges["r1"].data["feat"][eids]
feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == eids))
# Test edge_subgraph
sg = g.edge_subgraph({'r1': eids})
sg = g.edge_subgraph({"r1": eids})
assert sg.num_edges() == len(eids)
assert F.array_equal(sg.edata[dgl.EID], eids)
sg = g.edge_subgraph({('n1', 'r1', 'n2'): eids})
sg = g.edge_subgraph({("n1", "r1", "n2"): eids})
assert sg.num_edges() == len(eids)
assert F.array_equal(sg.edata[dgl.EID], eids)
# Test init node data
new_shape = (g.number_of_nodes('n1'), 2)
g.nodes['n1'].data['test1'] = dgl.distributed.DistTensor(new_shape, F.int32)
feats = g.nodes['n1'].data['test1'][nids]
new_shape = (g.number_of_nodes("n1"), 2)
g.nodes["n1"].data["test1"] = dgl.distributed.DistTensor(new_shape, F.int32)
feats = g.nodes["n1"].data["test1"][nids]
assert np.all(F.asnumpy(feats) == 0)
# create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init)
test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes('n1'), 3), F.float32, 'test3')
test3 = dgl.distributed.DistTensor(
(g.number_of_nodes("n1"), 3), F.float32, "test3"
)
del test3
# add tests for anonymous distributed tensor.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test3 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
data = test3[0:10]
test4 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test4 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
del test3
test5 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init)
test5 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
assert np.sum(F.asnumpy(test5[0:10] != data)) > 0
# test a persistent tesnor
test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init,
persistent=True)
test4 = dgl.distributed.DistTensor(
new_shape, F.float32, "test4", init_func=rand_init, persistent=True
)
del test4
try:
test4 = dgl.distributed.DistTensor((g.number_of_nodes('n1'), 3), F.float32, 'test4')
raise Exception('')
test4 = dgl.distributed.DistTensor(
(g.number_of_nodes("n1"), 3), F.float32, "test4"
)
raise Exception("")
except:
pass
# Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.nodes['n1'].data['test1'][nids] = new_feats
feats = g.nodes['n1'].data['test1'][nids]
g.nodes["n1"].data["test1"][nids] = new_feats
feats = g.nodes["n1"].data["test1"][nids]
assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations.
assert len(g.nodes['n1'].data['feat']) == g.number_of_nodes('n1')
assert g.nodes['n1'].data['feat'].shape == (g.number_of_nodes('n1'), 1)
assert g.nodes['n1'].data['feat'].dtype == F.int64
assert len(g.nodes["n1"].data["feat"]) == g.number_of_nodes("n1")
assert g.nodes["n1"].data["feat"].shape == (g.number_of_nodes("n1"), 1)
assert g.nodes["n1"].data["feat"].dtype == F.int64
selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes('n1')) > 30
selected_nodes = (
np.random.randint(0, 100, size=g.number_of_nodes("n1")) > 30
)
# Test node split
nodes = node_split(selected_nodes, g.get_partition_book(), ntype='n1')
nodes = node_split(selected_nodes, g.get_partition_book(), ntype="n1")
nodes = F.asnumpy(nodes)
# We only have one partition, so the local nodes are basically all nodes in the graph.
local_nids = np.arange(g.number_of_nodes('n1'))
local_nids = np.arange(g.number_of_nodes("n1"))
for n in nodes:
assert n in local_nids
print('end')
print("end")
def check_server_client_hetero(shared_mem, num_servers, num_clients):
prepare_dist(num_servers)
......@@ -561,16 +755,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
# Partition the graph
num_parts = 1
graph_name = 'dist_graph_test_3'
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = "dist_graph_test_3"
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine.
serv_ps = []
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
num_clients, shared_mem))
p = ctx.Process(
target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p)
p.start()
......@@ -578,9 +774,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
num_nodes = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
num_edges = {etype: g.number_of_edges(etype) for etype in g.etypes}
for cli_id in range(num_clients):
print('start client', cli_id)
p = ctx.Process(target=run_client_hetero, args=(graph_name, 0, num_servers, num_clients, num_nodes,
num_edges))
print("start client", cli_id)
p = ctx.Process(
target=run_client_hetero,
args=(
graph_name,
0,
num_servers,
num_clients,
num_nodes,
num_edges,
),
)
p.start()
cli_ps.append(p)
......@@ -590,14 +795,20 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
for p in serv_ps:
p.join()
print('clients have terminated')
print("clients have terminated")
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support")
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support some of operations in DistGraph",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support"
)
def test_server_client():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
check_server_client_hierarchy(False, 1, 4)
check_server_client_empty(True, 1, 1)
check_server_client_hetero(True, 1, 1)
......@@ -606,78 +817,110 @@ def test_server_client():
check_server_client(False, 1, 1)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now.
#check_server_client(True, 2, 2)
#check_server_client(True, 1, 1, 2)
#check_server_client(False, 1, 1, 2)
#check_server_client(True, 2, 2, 2)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding")
# check_server_client(True, 2, 2)
# check_server_client(True, 1, 1, 2)
# check_server_client(False, 1, 1, 2)
# check_server_client(True, 2, 2, 2)
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support distributed DistEmbedding",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="Mxnet doesn't support distributed DistEmbedding",
)
def test_dist_emb_server_client():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
check_dist_emb_server_client(True, 1, 1)
check_dist_emb_server_client(False, 1, 1)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now.
#check_dist_emb_server_client(True, 2, 2)
#check_dist_emb_server_client(True, 1, 1, 2)
#check_dist_emb_server_client(False, 1, 1, 2)
#check_dist_emb_server_client(True, 2, 2, 2)
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support")
# check_dist_emb_server_client(True, 2, 2)
# check_dist_emb_server_client(True, 1, 1, 2)
# check_dist_emb_server_client(False, 1, 1, 2)
# check_dist_emb_server_client(True, 2, 2, 2)
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support some of operations in DistGraph",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support"
)
def test_standalone():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'standalone'
os.environ["DGL_DIST_MODE"] = "standalone"
g = create_random_graph(10000)
# Partition the graph
num_parts = 1
graph_name = 'dist_graph_test_3'
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = "dist_graph_test_3"
g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
dgl.distributed.initialize("kv_ip_config.txt")
dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name))
dist_g = DistGraph(
graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name)
)
check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
dgl.distributed.exit_client() # this is needed since there's two test here in one process
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support distributed DistEmbedding",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="Mxnet doesn't support distributed DistEmbedding",
)
def test_standalone_node_emb():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'standalone'
os.environ["DGL_DIST_MODE"] = "standalone"
g = create_random_graph(10000)
# Partition the graph
num_parts = 1
graph_name = 'dist_graph_test_3'
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
graph_name = "dist_graph_test_3"
g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
dgl.distributed.initialize("kv_ip_config.txt")
dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name))
dist_g = DistGraph(
graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name)
)
check_dist_emb(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
dgl.distributed.exit_client() # this is needed since there's two test here in one process
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("hetero", [True, False])
def test_split(hetero):
if hetero:
g = create_random_hetero()
ntype = 'n1'
etype = 'r1'
ntype = "n1"
etype = "r1"
else:
g = create_random_graph(10000)
ntype = '_N'
etype = '_E'
ntype = "_N"
etype = "_E"
num_parts = 4
num_hops = 2
partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis')
partition_graph(
g,
"dist_graph_test",
num_parts,
"/tmp/dist_graph",
num_hops=num_hops,
part_method="metis",
)
node_mask = np.random.randint(0, 100, size=g.number_of_nodes(ntype)) > 30
edge_mask = np.random.randint(0, 100, size=g.number_of_edges(etype)) > 30
......@@ -688,14 +931,18 @@ def test_split(hetero):
# to determine how to split the workloads. Here is to simulate the multi-client
# use case.
def set_roles(num_clients):
dgl.distributed.role.CUR_ROLE = 'default'
dgl.distributed.role.GLOBAL_RANK = {i:i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK['default'] = {i:i for i in range(num_clients)}
dgl.distributed.role.CUR_ROLE = "default"
dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK["default"] = {
i: i for i in range(num_clients)
}
for i in range(num_parts):
set_roles(num_parts)
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition('/tmp/dist_graph/dist_graph_test.json', i)
local_nids = F.nonzero_1d(part_g.ndata['inner_node'])
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
"/tmp/dist_graph/dist_graph_test.json", i
)
local_nids = F.nonzero_1d(part_g.ndata["inner_node"])
local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
if hetero:
ntype_ids, nids = gpb.map_to_per_ntype(local_nids)
......@@ -703,19 +950,25 @@ def test_split(hetero):
else:
local_nids = F.asnumpy(local_nids)
nodes1 = np.intersect1d(selected_nodes, local_nids)
nodes2 = node_split(node_mask, gpb, ntype=ntype, rank=i, force_even=False)
nodes2 = node_split(
node_mask, gpb, ntype=ntype, rank=i, force_even=False
)
assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2)))
for n in F.asnumpy(nodes2):
assert n in local_nids
set_roles(num_parts * 2)
nodes3 = node_split(node_mask, gpb, ntype=ntype, rank=i * 2, force_even=False)
nodes4 = node_split(node_mask, gpb, ntype=ntype, rank=i * 2 + 1, force_even=False)
nodes3 = node_split(
node_mask, gpb, ntype=ntype, rank=i * 2, force_even=False
)
nodes4 = node_split(
node_mask, gpb, ntype=ntype, rank=i * 2 + 1, force_even=False
)
nodes5 = F.cat([nodes3, nodes4], 0)
assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5)))
set_roles(num_parts)
local_eids = F.nonzero_1d(part_g.edata['inner_edge'])
local_eids = F.nonzero_1d(part_g.edata["inner_edge"])
local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
if hetero:
etype_ids, eids = gpb.map_to_per_etype(local_eids)
......@@ -723,23 +976,37 @@ def test_split(hetero):
else:
local_eids = F.asnumpy(local_eids)
edges1 = np.intersect1d(selected_edges, local_eids)
edges2 = edge_split(edge_mask, gpb, etype=etype, rank=i, force_even=False)
edges2 = edge_split(
edge_mask, gpb, etype=etype, rank=i, force_even=False
)
assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2)))
for e in F.asnumpy(edges2):
assert e in local_eids
set_roles(num_parts * 2)
edges3 = edge_split(edge_mask, gpb, etype=etype, rank=i * 2, force_even=False)
edges4 = edge_split(edge_mask, gpb, etype=etype, rank=i * 2 + 1, force_even=False)
edges3 = edge_split(
edge_mask, gpb, etype=etype, rank=i * 2, force_even=False
)
edges4 = edge_split(
edge_mask, gpb, etype=etype, rank=i * 2 + 1, force_even=False
)
edges5 = F.cat([edges3, edges4], 0)
assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_split_even():
g = create_random_graph(10000)
num_parts = 4
num_hops = 2
partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis')
partition_graph(
g,
"dist_graph_test",
num_parts,
"/tmp/dist_graph",
num_hops=num_hops,
part_method="metis",
)
node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30
......@@ -754,19 +1021,27 @@ def test_split_even():
# to determine how to split the workloads. Here is to simulate the multi-client
# use case.
def set_roles(num_clients):
dgl.distributed.role.CUR_ROLE = 'default'
dgl.distributed.role.GLOBAL_RANK = {i:i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK['default'] = {i:i for i in range(num_clients)}
dgl.distributed.role.CUR_ROLE = "default"
dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK["default"] = {
i: i for i in range(num_clients)
}
for i in range(num_parts):
set_roles(num_parts)
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition('/tmp/dist_graph/dist_graph_test.json', i)
local_nids = F.nonzero_1d(part_g.ndata['inner_node'])
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
"/tmp/dist_graph/dist_graph_test.json", i
)
local_nids = F.nonzero_1d(part_g.ndata["inner_node"])
local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
nodes = node_split(node_mask, gpb, rank=i, force_even=True)
all_nodes1.append(nodes)
subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids))
print('part {} get {} nodes and {} are in the partition'.format(i, len(nodes), len(subset)))
print(
"part {} get {} nodes and {} are in the partition".format(
i, len(nodes), len(subset)
)
)
set_roles(num_parts * 2)
nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True)
......@@ -774,15 +1049,19 @@ def test_split_even():
nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0))
all_nodes2.append(nodes3)
subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3))
print('intersection has', len(subset))
print("intersection has", len(subset))
set_roles(num_parts)
local_eids = F.nonzero_1d(part_g.edata['inner_edge'])
local_eids = F.nonzero_1d(part_g.edata["inner_edge"])
local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
edges = edge_split(edge_mask, gpb, rank=i, force_even=True)
all_edges1.append(edges)
subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids))
print('part {} get {} edges and {} are in the partition'.format(i, len(edges), len(subset)))
print(
"part {} get {} edges and {} are in the partition".format(
i, len(edges), len(subset)
)
)
set_roles(num_parts * 2)
edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True)
......@@ -790,7 +1069,7 @@ def test_split_even():
edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0))
all_edges2.append(edges3)
subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3))
print('intersection has', len(subset))
print("intersection has", len(subset))
all_nodes1 = F.cat(all_nodes1, 0)
all_edges1 = F.cat(all_edges1, 0)
all_nodes2 = F.cat(all_nodes2, 0)
......@@ -802,11 +1081,13 @@ def test_split_even():
assert np.all(all_nodes == F.asnumpy(all_nodes2))
assert np.all(all_edges == F.asnumpy(all_edges2))
def prepare_dist(num_servers=1):
generate_ip_config("kv_ip_config.txt", 1, num_servers=num_servers)
if __name__ == '__main__':
os.makedirs('/tmp/dist_graph', exist_ok=True)
if __name__ == "__main__":
os.makedirs("/tmp/dist_graph", exist_ok=True)
test_dist_emb_server_client()
test_server_client()
test_split(True)
......
import multiprocessing as mp
import os
import socket
import time
import unittest
import backend as F
import numpy as np
import socket
from numpy.testing import assert_array_equal
from scipy import sparse as spsp
from utils import generate_ip_config, reset_envs
import dgl
import backend as F
import unittest
from dgl.graph_index import create_graph_index
import multiprocessing as mp
from numpy.testing import assert_array_equal
from utils import generate_ip_config, reset_envs
if os.name != 'nt':
if os.name != "nt":
import fcntl
import struct
# Create an one-part Graph
node_map = F.tensor([0,0,0,0,0,0], F.int64)
edge_map = F.tensor([0,0,0,0,0,0,0], F.int64)
global_nid = F.tensor([0,1,2,3,4,5], F.int64)
global_eid = F.tensor([0,1,2,3,4,5,6], F.int64)
node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64)
edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)
global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64)
global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)
g = dgl.DGLGraph()
g.add_nodes(6)
......@@ -34,230 +36,274 @@ g.add_edges(2, 5) # 6
g.ndata[dgl.NID] = global_nid
g.edata[dgl.EID] = global_eid
gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(part_id=0,
num_parts=1,
node_map=node_map,
edge_map=edge_map,
part_graph=g)
gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(
part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map, part_graph=g
)
node_policy = dgl.distributed.PartitionPolicy(policy_str='node:_N',
partition_book=gpb)
node_policy = dgl.distributed.PartitionPolicy(
policy_str="node:_N", partition_book=gpb
)
edge_policy = dgl.distributed.PartitionPolicy(policy_str='edge:_E',
partition_book=gpb)
edge_policy = dgl.distributed.PartitionPolicy(
policy_str="edge:_E", partition_book=gpb
)
data_0 = F.tensor(
[[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]],
F.float32,
)
data_0_1 = F.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], F.float32)
data_0_2 = F.tensor([1, 2, 3, 4, 5, 6], F.int32)
data_0_3 = F.tensor([1, 2, 3, 4, 5, 6], F.int64)
data_1 = F.tensor(
[
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
],
F.float32,
)
data_2 = F.tensor(
[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],
F.float32,
)
data_0 = F.tensor([[1.,1.],[1.,1.],[1.,1.],[1.,1.],[1.,1.],[1.,1.]], F.float32)
data_0_1 = F.tensor([1.,2.,3.,4.,5.,6.], F.float32)
data_0_2 = F.tensor([1,2,3,4,5,6], F.int32)
data_0_3 = F.tensor([1,2,3,4,5,6], F.int64)
data_1 = F.tensor([[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.]], F.float32)
data_2 = F.tensor([[0.,0.],[0.,0.],[0.,0.],[0.,0.],[0.,0.],[0.,0.]], F.float32)
def init_zero_func(shape, dtype):
return F.zeros(shape, dtype, F.cpu())
def udf_push(target, name, id_tensor, data_tensor):
target[name][id_tensor] = data_tensor * data_tensor
def add_push(target, name, id_tensor, data_tensor):
target[name][id_tensor] += data_tensor
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_partition_policy():
assert node_policy.part_id == 0
assert edge_policy.part_id == 0
local_nid = node_policy.to_local(F.tensor([0,1,2,3,4,5]))
local_eid = edge_policy.to_local(F.tensor([0,1,2,3,4,5,6]))
assert_array_equal(F.asnumpy(local_nid), F.asnumpy(F.tensor([0,1,2,3,4,5], F.int64)))
assert_array_equal(F.asnumpy(local_eid), F.asnumpy(F.tensor([0,1,2,3,4,5,6], F.int64)))
nid_partid = node_policy.to_partid(F.tensor([0,1,2,3,4,5], F.int64))
eid_partid = edge_policy.to_partid(F.tensor([0,1,2,3,4,5,6], F.int64))
assert_array_equal(F.asnumpy(nid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0], F.int64)))
assert_array_equal(F.asnumpy(eid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0,0], F.int64)))
local_nid = node_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5]))
local_eid = edge_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5, 6]))
assert_array_equal(
F.asnumpy(local_nid), F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5], F.int64))
)
assert_array_equal(
F.asnumpy(local_eid),
F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)),
)
nid_partid = node_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5], F.int64))
eid_partid = edge_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64))
assert_array_equal(
F.asnumpy(nid_partid), F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0], F.int64))
)
assert_array_equal(
F.asnumpy(eid_partid),
F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)),
)
assert node_policy.get_part_size() == len(node_map)
assert edge_policy.get_part_size() == len(edge_map)
def start_server(server_id, num_clients, num_servers):
# Init kvserver
print("Sleep 5 seconds to test client re-connect.")
time.sleep(5)
kvserver = dgl.distributed.KVServer(server_id=server_id,
ip_config='kv_ip_config.txt',
kvserver = dgl.distributed.KVServer(
server_id=server_id,
ip_config="kv_ip_config.txt",
num_servers=num_servers,
num_clients=num_clients)
num_clients=num_clients,
)
kvserver.add_part_policy(node_policy)
kvserver.add_part_policy(edge_policy)
if kvserver.is_backup_server():
kvserver.init_data('data_0', 'node:_N')
kvserver.init_data('data_0_1', 'node:_N')
kvserver.init_data('data_0_2', 'node:_N')
kvserver.init_data('data_0_3', 'node:_N')
kvserver.init_data("data_0", "node:_N")
kvserver.init_data("data_0_1", "node:_N")
kvserver.init_data("data_0_2", "node:_N")
kvserver.init_data("data_0_3", "node:_N")
else:
kvserver.init_data('data_0', 'node:_N', data_0)
kvserver.init_data('data_0_1', 'node:_N', data_0_1)
kvserver.init_data('data_0_2', 'node:_N', data_0_2)
kvserver.init_data('data_0_3', 'node:_N', data_0_3)
kvserver.init_data("data_0", "node:_N", data_0)
kvserver.init_data("data_0_1", "node:_N", data_0_1)
kvserver.init_data("data_0_2", "node:_N", data_0_2)
kvserver.init_data("data_0_3", "node:_N", data_0_3)
# start server
server_state = dgl.distributed.ServerState(kv_store=kvserver, local_g=None, partition_book=None)
dgl.distributed.start_server(server_id=server_id,
ip_config='kv_ip_config.txt',
server_state = dgl.distributed.ServerState(
kv_store=kvserver, local_g=None, partition_book=None
)
dgl.distributed.start_server(
server_id=server_id,
ip_config="kv_ip_config.txt",
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state)
server_state=server_state,
)
def start_server_mul_role(server_id, num_clients, num_servers):
# Init kvserver
kvserver = dgl.distributed.KVServer(server_id=server_id,
ip_config='kv_ip_mul_config.txt',
kvserver = dgl.distributed.KVServer(
server_id=server_id,
ip_config="kv_ip_mul_config.txt",
num_servers=num_servers,
num_clients=num_clients)
num_clients=num_clients,
)
kvserver.add_part_policy(node_policy)
if kvserver.is_backup_server():
kvserver.init_data('data_0', 'node:_N')
kvserver.init_data("data_0", "node:_N")
else:
kvserver.init_data('data_0', 'node:_N', data_0)
kvserver.init_data("data_0", "node:_N", data_0)
# start server
server_state = dgl.distributed.ServerState(kv_store=kvserver, local_g=None, partition_book=None)
dgl.distributed.start_server(server_id=server_id,
ip_config='kv_ip_mul_config.txt',
server_state = dgl.distributed.ServerState(
kv_store=kvserver, local_g=None, partition_book=None
)
dgl.distributed.start_server(
server_id=server_id,
ip_config="kv_ip_mul_config.txt",
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state)
server_state=server_state,
)
def start_client(num_clients, num_servers):
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
# Note: connect to server first !
dgl.distributed.initialize(ip_config='kv_ip_config.txt')
dgl.distributed.initialize(ip_config="kv_ip_config.txt")
# Init kvclient
kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt', num_servers=num_servers)
kvclient = dgl.distributed.KVClient(
ip_config="kv_ip_config.txt", num_servers=num_servers
)
kvclient.map_shared_data(partition_book=gpb)
assert dgl.distributed.get_num_client() == num_clients
kvclient.init_data(name='data_1',
kvclient.init_data(
name="data_1",
shape=F.shape(data_1),
dtype=F.dtype(data_1),
part_policy=edge_policy,
init_func=init_zero_func)
kvclient.init_data(name='data_2',
init_func=init_zero_func,
)
kvclient.init_data(
name="data_2",
shape=F.shape(data_2),
dtype=F.dtype(data_2),
part_policy=node_policy,
init_func=init_zero_func)
init_func=init_zero_func,
)
# Test data_name_list
name_list = kvclient.data_name_list()
print(name_list)
assert 'data_0' in name_list
assert 'data_0_1' in name_list
assert 'data_0_2' in name_list
assert 'data_0_3' in name_list
assert 'data_1' in name_list
assert 'data_2' in name_list
assert "data_0" in name_list
assert "data_0_1" in name_list
assert "data_0_2" in name_list
assert "data_0_3" in name_list
assert "data_1" in name_list
assert "data_2" in name_list
# Test get_meta_data
meta = kvclient.get_data_meta('data_0')
meta = kvclient.get_data_meta("data_0")
dtype, shape, policy = meta
assert dtype == F.dtype(data_0)
assert shape == F.shape(data_0)
assert policy.policy_str == 'node:_N'
assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_1')
meta = kvclient.get_data_meta("data_0_1")
dtype, shape, policy = meta
assert dtype == F.dtype(data_0_1)
assert shape == F.shape(data_0_1)
assert policy.policy_str == 'node:_N'
assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_2')
meta = kvclient.get_data_meta("data_0_2")
dtype, shape, policy = meta
assert dtype == F.dtype(data_0_2)
assert shape == F.shape(data_0_2)
assert policy.policy_str == 'node:_N'
assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_3')
meta = kvclient.get_data_meta("data_0_3")
dtype, shape, policy = meta
assert dtype == F.dtype(data_0_3)
assert shape == F.shape(data_0_3)
assert policy.policy_str == 'node:_N'
assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_1')
meta = kvclient.get_data_meta("data_1")
dtype, shape, policy = meta
assert dtype == F.dtype(data_1)
assert shape == F.shape(data_1)
assert policy.policy_str == 'edge:_E'
assert policy.policy_str == "edge:_E"
meta = kvclient.get_data_meta('data_2')
meta = kvclient.get_data_meta("data_2")
dtype, shape, policy = meta
assert dtype == F.dtype(data_2)
assert shape == F.shape(data_2)
assert policy.policy_str == 'node:_N'
assert policy.policy_str == "node:_N"
# Test push and pull
id_tensor = F.tensor([0,2,4], F.int64)
data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
kvclient.push(name='data_0',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_1',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_2',
id_tensor=id_tensor,
data_tensor=data_tensor)
res = kvclient.pull(name='data_0', id_tensor=id_tensor)
id_tensor = F.tensor([0, 2, 4], F.int64)
data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32)
kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor)
res = kvclient.pull(name="data_0", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_1', id_tensor=id_tensor)
res = kvclient.pull(name="data_1", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_2', id_tensor=id_tensor)
res = kvclient.pull(name="data_2", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
# Register new push handler
kvclient.register_push_handler('data_0', udf_push)
kvclient.register_push_handler('data_1', udf_push)
kvclient.register_push_handler('data_2', udf_push)
kvclient.register_push_handler("data_0", udf_push)
kvclient.register_push_handler("data_1", udf_push)
kvclient.register_push_handler("data_2", udf_push)
# Test push and pull
kvclient.push(name='data_0',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_1',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_2',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.barrier()
data_tensor = data_tensor * data_tensor
res = kvclient.pull(name='data_0', id_tensor=id_tensor)
res = kvclient.pull(name="data_0", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_1', id_tensor=id_tensor)
res = kvclient.pull(name="data_1", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_2', id_tensor=id_tensor)
res = kvclient.pull(name="data_2", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
# Test delete data
kvclient.delete_data('data_0')
kvclient.delete_data('data_1')
kvclient.delete_data('data_2')
kvclient.delete_data("data_0")
kvclient.delete_data("data_1")
kvclient.delete_data("data_2")
# Register new push handler
kvclient.init_data(name='data_3',
kvclient.init_data(
name="data_3",
shape=F.shape(data_2),
dtype=F.dtype(data_2),
part_policy=node_policy,
init_func=init_zero_func)
kvclient.register_push_handler('data_3', add_push)
data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32)
init_func=init_zero_func,
)
kvclient.register_push_handler("data_3", add_push)
data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32)
kvclient.barrier()
time.sleep(kvclient.client_id + 1)
print("add...")
kvclient.push(name='data_3',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name="data_3", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.barrier()
res = kvclient.pull(name='data_3', id_tensor=id_tensor)
res = kvclient.pull(name="data_3", id_tensor=id_tensor)
data_tensor = data_tensor * num_clients
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
def start_client_mul_role(i):
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
# Initialize creates kvstore !
dgl.distributed.initialize(ip_config='kv_ip_mul_config.txt')
dgl.distributed.initialize(ip_config="kv_ip_mul_config.txt")
if i == 0: # block one trainer
time.sleep(5)
kvclient = dgl.distributed.kvstore.get_kvstore()
......@@ -266,26 +312,39 @@ def start_client_mul_role(i):
assert dgl.distributed.role.get_num_trainers() == 2
assert dgl.distributed.role.get_trainer_rank() < 2
print('trainer rank: %d, global rank: %d' % (dgl.distributed.role.get_trainer_rank(),
dgl.distributed.role.get_global_rank()))
print(
"trainer rank: %d, global rank: %d"
% (
dgl.distributed.role.get_trainer_rank(),
dgl.distributed.role.get_global_rank(),
)
)
dgl.distributed.exit_client()
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_kv_store():
reset_envs()
num_servers = 2
num_clients = 2
generate_ip_config("kv_ip_config.txt", 1, num_servers)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
pserver_list = []
pclient_list = []
os.environ['DGL_NUM_SERVER'] = str(num_servers)
os.environ["DGL_NUM_SERVER"] = str(num_servers)
for i in range(num_servers):
pserver = ctx.Process(target=start_server, args=(i, num_clients, num_servers))
pserver = ctx.Process(
target=start_server, args=(i, num_clients, num_servers)
)
pserver.start()
pserver_list.append(pserver)
for i in range(num_clients):
pclient = ctx.Process(target=start_client, args=(num_clients, num_servers))
pclient = ctx.Process(
target=start_client, args=(num_clients, num_servers)
)
pclient.start()
pclient_list.append(pclient)
for i in range(num_clients):
......@@ -293,7 +352,11 @@ def test_kv_store():
for i in range(num_servers):
pserver_list[i].join()
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_kv_multi_role():
reset_envs()
num_servers = 2
......@@ -302,13 +365,15 @@ def test_kv_multi_role():
generate_ip_config("kv_ip_mul_config.txt", 1, num_servers)
# There are two trainer processes and each trainer process has two sampler processes.
num_clients = num_trainers * (1 + num_samplers)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
pserver_list = []
pclient_list = []
os.environ['DGL_NUM_SAMPLER'] = str(num_samplers)
os.environ['DGL_NUM_SERVER'] = str(num_servers)
os.environ["DGL_NUM_SAMPLER"] = str(num_samplers)
os.environ["DGL_NUM_SERVER"] = str(num_servers)
for i in range(num_servers):
pserver = ctx.Process(target=start_server_mul_role, args=(i, num_clients, num_servers))
pserver = ctx.Process(
target=start_server_mul_role, args=(i, num_clients, num_servers)
)
pserver.start()
pserver_list.append(pserver)
for i in range(num_trainers):
......@@ -320,7 +385,8 @@ def test_kv_multi_role():
for i in range(num_servers):
pserver_list[i].join()
if __name__ == '__main__':
if __name__ == "__main__":
test_partition_policy()
test_kv_store()
test_kv_multi_role()
import multiprocessing as mp
import os
import time
import socket
import time
import unittest
import dgl
import backend as F
import unittest, pytest
import multiprocessing as mp
import pytest
from numpy.testing import assert_array_equal
from utils import reset_envs, generate_ip_config
from utils import generate_ip_config, reset_envs
if os.name != 'nt':
import dgl
if os.name != "nt":
import fcntl
import struct
INTEGER = 2
STR = 'hello world!'
STR = "hello world!"
HELLO_SERVICE_ID = 901231
TENSOR = F.zeros((1000, 1000), F.int64, F.cpu())
def foo(x, y):
assert x == 123
assert y == "abc"
class MyRequest(dgl.distributed.Request):
def __init__(self):
self.x = 123
......@@ -38,6 +42,7 @@ class MyRequest(dgl.distributed.Request):
def process_request(self, server_state):
pass
class MyResponse(dgl.distributed.Response):
def __init__(self):
self.x = 432
......@@ -48,9 +53,11 @@ class MyResponse(dgl.distributed.Response):
def __setstate__(self, state):
self.x = state
def simple_func(tensor):
return tensor
class HelloResponse(dgl.distributed.Response):
def __init__(self, hello_str, integer, tensor):
self.hello_str = hello_str
......@@ -63,6 +70,7 @@ class HelloResponse(dgl.distributed.Response):
def __setstate__(self, state):
self.hello_str, self.integer, self.tensor = state
class HelloRequest(dgl.distributed.Request):
def __init__(self, hello_str, integer, tensor, func):
self.hello_str = hello_str
......@@ -85,7 +93,7 @@ class HelloRequest(dgl.distributed.Request):
TIMEOUT_SERVICE_ID = 123456789
TIMEOUT_META = 'timeout_test'
TIMEOUT_META = "timeout_test"
class TimeoutResponse(dgl.distributed.Response):
......@@ -114,33 +122,53 @@ class TimeoutRequest(dgl.distributed.Request):
def process_request(self, server_state):
assert self.meta == TIMEOUT_META
# convert from milliseconds to seconds
time.sleep(self.timeout/1000)
time.sleep(self.timeout / 1000)
if not self.response:
return None
res = TimeoutResponse(self.meta)
return res
def start_server(num_clients, ip_config, server_id=0, keep_alive=False, num_servers=1, net_type='tensorpipe'):
def start_server(
num_clients,
ip_config,
server_id=0,
keep_alive=False,
num_servers=1,
net_type="tensorpipe",
):
print("Sleep 1 seconds to test client re-connect.")
time.sleep(1)
server_state = dgl.distributed.ServerState(
None, local_g=None, partition_book=None, keep_alive=keep_alive)
None, local_g=None, partition_book=None, keep_alive=keep_alive
)
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse)
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.register_service(
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse)
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse
)
print("Start server {}".format(server_id))
dgl.distributed.start_server(server_id=server_id,
dgl.distributed.start_server(
server_id=server_id,
ip_config=ip_config,
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state,
net_type=net_type)
net_type=net_type,
)
def start_client(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'):
dgl.distributed.register_service(HELLO_SERVICE_ID, HelloRequest, HelloResponse)
def start_client(ip_config, group_id=0, num_servers=1, net_type="tensorpipe"):
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type)
ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
# test send and recv
dgl.distributed.send_request(0, req)
......@@ -174,16 +202,23 @@ def start_client(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'):
assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR))
def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'):
def start_client_timeout(
ip_config, group_id=0, num_servers=1, net_type="tensorpipe"
):
dgl.distributed.register_service(
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse)
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse
)
dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type)
ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
timeout = 1 * 1000 # milliseconds
req = TimeoutRequest(TIMEOUT_META, timeout)
# test send and recv
dgl.distributed.send_request(0, req)
res = dgl.distributed.recv_response(timeout=int(timeout/2))
res = dgl.distributed.recv_response(timeout=int(timeout / 2))
assert res is None
res = dgl.distributed.recv_response()
assert res.meta == TIMEOUT_META
......@@ -195,14 +230,15 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp
expect_except = False
try:
res_list = dgl.distributed.remote_call(
target_and_requests, timeout=int(timeout/2))
target_and_requests, timeout=int(timeout / 2)
)
except dgl.DGLError:
expect_except = True
assert expect_except
# test send_request_to_machine
req = TimeoutRequest(TIMEOUT_META, timeout)
dgl.distributed.send_request_to_machine(0, req)
res = dgl.distributed.recv_response(timeout=int(timeout/2))
res = dgl.distributed.recv_response(timeout=int(timeout / 2))
assert res is None
res = dgl.distributed.recv_response()
assert res.meta == TIMEOUT_META
......@@ -214,30 +250,41 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp
expect_except = False
try:
res_list = dgl.distributed.remote_call_to_machine(
target_and_requests, timeout=int(timeout/2))
target_and_requests, timeout=int(timeout / 2)
)
except dgl.DGLError:
expect_except = True
assert expect_except
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_rpc_timeout(net_type):
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config.txt"
generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn')
pserver = ctx.Process(target=start_server, args=(1, ip_config, 0, False, 1, net_type))
pclient = ctx.Process(target=start_client_timeout, args=(ip_config, 0, 1, net_type))
ctx = mp.get_context("spawn")
pserver = ctx.Process(
target=start_server, args=(1, ip_config, 0, False, 1, net_type)
)
pclient = ctx.Process(
target=start_client_timeout, args=(ip_config, 0, 1, net_type)
)
pserver.start()
pclient.start()
pserver.join()
pclient.join()
def test_serialize():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
from dgl.distributed.rpc import serialize_to_payload, deserialize_from_payload
os.environ["DGL_DIST_MODE"] = "distributed"
from dgl.distributed.rpc import (
deserialize_from_payload,
serialize_to_payload,
)
SERVICE_ID = 12345
dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse)
req = MyRequest()
......@@ -253,10 +300,16 @@ def test_serialize():
res1 = deserialize_from_payload(MyResponse, data, tensors)
assert res.x == res1.x
def test_rpc_msg():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
from dgl.distributed.rpc import serialize_to_payload, deserialize_from_payload, RPCMessage
os.environ["DGL_DIST_MODE"] = "distributed"
from dgl.distributed.rpc import (
RPCMessage,
deserialize_from_payload,
serialize_to_payload,
)
SERVICE_ID = 32452
dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse)
req = MyRequest()
......@@ -270,33 +323,45 @@ def test_rpc_msg():
assert len(rpcmsg.tensors) == 1
assert F.array_equal(rpcmsg.tensors[0], req.z)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["tensorpipe"])
def test_rpc(net_type):
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
generate_ip_config("rpc_ip_config.txt", 1, 1)
ctx = mp.get_context('spawn')
pserver = ctx.Process(target=start_server, args=(1, "rpc_ip_config.txt", 0, False, 1, net_type))
pclient = ctx.Process(target=start_client, args=("rpc_ip_config.txt", 0, 1, net_type))
ctx = mp.get_context("spawn")
pserver = ctx.Process(
target=start_server,
args=(1, "rpc_ip_config.txt", 0, False, 1, net_type),
)
pclient = ctx.Process(
target=start_client, args=("rpc_ip_config.txt", 0, 1, net_type)
)
pserver.start()
pclient.start()
pserver.join()
pclient.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_client(net_type):
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client.txt"
generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
num_clients = 20
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, 0, False, 1, net_type))
pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, 0, False, 1, net_type),
)
pclient_list = []
for i in range(num_clients):
pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1, net_type))
pclient = ctx.Process(
target=start_client, args=(ip_config, 0, 1, net_type)
)
pclient_list.append(pclient)
pserver.start()
for i in range(num_clients):
......@@ -306,24 +371,32 @@ def test_multi_client(net_type):
pserver.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_thread_rpc(net_type):
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
num_servers = 2
ip_config = "rpc_ip_config_multithread.txt"
generate_ip_config(ip_config, num_servers, num_servers)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
pserver_list = []
for i in range(num_servers):
pserver = ctx.Process(target=start_server, args=(1, ip_config, i, False, 1, net_type))
pserver = ctx.Process(
target=start_server, args=(1, ip_config, i, False, 1, net_type)
)
pserver.start()
pserver_list.append(pserver)
def start_client_multithread(ip_config):
import threading
dgl.distributed.connect_to_server(ip_config=ip_config, num_servers=1, net_type=net_type)
dgl.distributed.register_service(HELLO_SERVICE_ID, HelloRequest, HelloResponse)
dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=1, net_type=net_type
)
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
dgl.distributed.send_request(0, req)
......@@ -332,7 +405,6 @@ def test_multi_thread_rpc(net_type):
req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
dgl.distributed.send_request(server_id, req)
subthread = threading.Thread(target=subthread_call, args=(1,))
subthread.start()
subthread.join()
......@@ -347,11 +419,15 @@ def test_multi_thread_rpc(net_type):
start_client_multithread(ip_config)
pserver.join()
@unittest.skipIf(True, reason="Tests of multiple groups may fail and let's disable them for now.")
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(
True,
reason="Tests of multiple groups may fail and let's disable them for now.",
)
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_multi_client_groups():
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client_groups.txt"
num_machines = 5
# should test with larger number but due to possible port in-use issue.
......@@ -360,16 +436,21 @@ def test_multi_client_groups():
# presssue test
num_clients = 2
num_groups = 2
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
pserver_list = []
for i in range(num_servers*num_machines):
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, i, True, num_servers))
for i in range(num_servers * num_machines):
pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, i, True, num_servers),
)
pserver.start()
pserver_list.append(pserver)
pclient_list = []
for i in range(num_clients):
for group_id in range(num_groups):
pclient = ctx.Process(target=start_client, args=(ip_config, group_id, num_servers))
pclient = ctx.Process(
target=start_client, args=(ip_config, group_id, num_servers)
)
pclient.start()
pclient_list.append(pclient)
for p in pclient_list:
......@@ -381,19 +462,23 @@ def test_multi_client_groups():
for p in pserver_list:
p.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe'])
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_client_connect(net_type):
reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed'
os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client.txt"
generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn')
ctx = mp.get_context("spawn")
num_clients = 1
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, 0, False, 1, net_type))
pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, 0, False, 1, net_type),
)
# small max try times
os.environ['DGL_DIST_MAX_TRY_TIMES'] = '1'
os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1"
expect_except = False
try:
start_client(ip_config, 0, 1, net_type)
......@@ -403,7 +488,7 @@ def test_multi_client_connect(net_type):
assert expect_except
# large max try times
os.environ['DGL_DIST_MAX_TRY_TIMES'] = '1024'
os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1024"
pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1, net_type))
pclient.start()
pserver.start()
......@@ -411,12 +496,13 @@ def test_multi_client_connect(net_type):
pserver.join()
reset_envs()
if __name__ == '__main__':
if __name__ == "__main__":
test_serialize()
test_rpc_msg()
test_rpc()
test_multi_client('socket')
test_multi_client('tesnsorpipe')
test_multi_client("socket")
test_multi_client("tesnsorpipe")
test_multi_thread_rpc()
test_multi_client_connect('socket')
test_multi_client_connect('tensorpipe')
test_multi_client_connect("socket")
test_multi_client_connect("tensorpipe")
import socket
import os
import random
import scipy.sparse as spsp
import socket
import numpy as np
import scipy.sparse as spsp
import dgl
......@@ -13,10 +14,10 @@ def generate_ip_config(file_name, num_machines, num_servers):
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
# doesn't even have to be reachable
sock.connect(('10.255.255.255', 1))
sock.connect(("10.255.255.255", 1))
ip = sock.getsockname()[0]
except ValueError:
ip = '127.0.0.1'
ip = "127.0.0.1"
finally:
sock.close()
......@@ -35,16 +36,23 @@ def generate_ip_config(file_name, num_machines, num_servers):
sock.close()
if len(ports) < num_machines * num_servers:
raise RuntimeError(
"Failed to get available IP/PORT with required numbers.")
with open(file_name, 'w') as f:
"Failed to get available IP/PORT with required numbers."
)
with open(file_name, "w") as f:
for i in range(num_machines):
f.write('{} {}\n'.format(ip, ports[i*num_servers]))
f.write("{} {}\n".format(ip, ports[i * num_servers]))
def reset_envs():
"""Reset common environment variable which are set in tests. """
for key in ['DGL_ROLE', 'DGL_NUM_SAMPLER', 'DGL_NUM_SERVER', \
'DGL_DIST_MODE', 'DGL_NUM_CLIENT', 'DGL_DIST_MAX_TRY_TIMES']:
"""Reset common environment variable which are set in tests."""
for key in [
"DGL_ROLE",
"DGL_NUM_SAMPLER",
"DGL_NUM_SERVER",
"DGL_DIST_MODE",
"DGL_NUM_CLIENT",
"DGL_DIST_MAX_TRY_TIMES",
]:
if key in os.environ:
os.environ.pop(key)
......
import dgl
import pytest
import torch
from dglgo.model import *
from test_utils.graph_cases import get_cases
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
import dgl
from dglgo.model import *
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gcn(g):
data_info = {
'num_nodes': g.num_nodes(),
'out_size': 7
}
data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
node_feat = None
edge_feat = g.edata['scalar_w']
edge_feat = g.edata["scalar_w"]
# node embedding + not use_edge_weight
model = GCN(data_info, embed_size=10, use_edge_weight=False)
......@@ -21,8 +20,8 @@ def test_gcn(g):
model = GCN(data_info, embed_size=10, use_edge_weight=True)
model(g, node_feat, edge_feat)
data_info['in_size'] = g.ndata['h'].shape[-1]
node_feat = g.ndata['h']
data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata["h"]
# node feat + not use_edge_weight
model = GCN(data_info, embed_size=-1, use_edge_weight=False)
......@@ -32,15 +31,13 @@ def test_gcn(g):
model = GCN(data_info, embed_size=-1, use_edge_weight=True)
model(g, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_gcn_block(g):
data_info = {
'in_size': 10,
'out_size': 7
}
data_info = {"in_size": 10, "out_size": 7}
blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size'])
node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
edge_feat = torch.abs(torch.randn(g.num_edges()))
# not use_edge_weight
model = GCN(data_info, use_edge_weight=False)
......@@ -50,12 +47,10 @@ def test_gcn_block(g):
model = GCN(data_info, use_edge_weight=True)
model.forward_block(blocks, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gat(g):
data_info = {
'num_nodes': g.num_nodes(),
'out_size': 7
}
data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
node_feat = None
# node embedding
......@@ -63,29 +58,25 @@ def test_gat(g):
model(g, node_feat)
# node feat
data_info['in_size'] = g.ndata['h'].shape[-1]
node_feat = g.ndata['h']
data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata["h"]
model = GAT(data_info, embed_size=-1)
model(g, node_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_gat_block(g):
data_info = {
'in_size': 10,
'out_size': 7
}
data_info = {"in_size": 10, "out_size": 7}
blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size'])
node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
model = GAT(data_info, num_layers=1, heads=[8])
model.forward_block(blocks, node_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gin(g):
data_info = {
'num_nodes': g.num_nodes(),
'out_size': 7
}
data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
node_feat = None
# node embedding
......@@ -93,19 +84,17 @@ def test_gin(g):
model(g, node_feat)
# node feat
data_info['in_size'] = g.ndata['h'].shape[-1]
node_feat = g.ndata['h']
data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata["h"]
model = GIN(data_info, embed_size=-1)
model(g, node_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_sage(g):
data_info = {
'num_nodes': g.num_nodes(),
'out_size': 7
}
data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
node_feat = None
edge_feat = g.edata['scalar_w']
edge_feat = g.edata["scalar_w"]
# node embedding
model = GraphSAGE(data_info, embed_size=10)
......@@ -113,32 +102,28 @@ def test_sage(g):
model(g, node_feat, edge_feat)
# node feat
data_info['in_size'] = g.ndata['h'].shape[-1]
node_feat = g.ndata['h']
data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata["h"]
model = GraphSAGE(data_info, embed_size=-1)
model(g, node_feat)
model(g, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_sage_block(g):
data_info = {
'in_size': 10,
'out_size': 7
}
data_info = {"in_size": 10, "out_size": 7}
blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size'])
node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
edge_feat = torch.abs(torch.randn(g.num_edges()))
model = GraphSAGE(data_info, embed_size=-1)
model.forward_block(blocks, node_feat)
model.forward_block(blocks, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_sgc(g):
data_info = {
'num_nodes': g.num_nodes(),
'out_size': 7
}
data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
node_feat = None
# node embedding
......@@ -146,44 +131,37 @@ def test_sgc(g):
model(g, node_feat)
# node feat
data_info['in_size'] = g.ndata['h'].shape[-1]
node_feat = g.ndata['h']
data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata["h"]
model = SGC(data_info, embed_size=-1)
model(g, node_feat)
def test_bilinear():
data_info = {
'in_size': 10,
'out_size': 1
}
data_info = {"in_size": 10, "out_size": 1}
model = BilinearPredictor(data_info)
num_pairs = 10
h_src = torch.randn(num_pairs, data_info['in_size'])
h_dst = torch.randn(num_pairs, data_info['in_size'])
h_src = torch.randn(num_pairs, data_info["in_size"])
h_dst = torch.randn(num_pairs, data_info["in_size"])
model(h_src, h_dst)
def test_ele():
data_info = {
'in_size': 10,
'out_size': 1
}
data_info = {"in_size": 10, "out_size": 1}
model = ElementWiseProductPredictor(data_info)
num_pairs = 10
h_src = torch.randn(num_pairs, data_info['in_size'])
h_dst = torch.randn(num_pairs, data_info['in_size'])
h_src = torch.randn(num_pairs, data_info["in_size"])
h_dst = torch.randn(num_pairs, data_info["in_size"])
model(h_src, h_dst)
@pytest.mark.parametrize('virtual_node', [True, False])
@pytest.mark.parametrize("virtual_node", [True, False])
def test_ogbg_gin(virtual_node):
# Test for ogbg-mol datasets
data_info = {
'name': 'ogbg-molhiv',
'out_size': 1
}
model = OGBGGIN(data_info,
embed_size=10,
num_layers=2,
virtual_node=virtual_node)
data_info = {"name": "ogbg-molhiv", "out_size": 1}
model = OGBGGIN(
data_info, embed_size=10, num_layers=2, virtual_node=virtual_node
)
num_nodes = 5
num_edges = 15
g1 = dgl.rand_graph(num_nodes, num_edges)
......@@ -197,29 +175,23 @@ def test_ogbg_gin(virtual_node):
# Test for non-ogbg-mol datasets
data_info = {
'name': 'a_dataset',
'out_size': 1,
'node_feat_size': 15,
'edge_feat_size': 5
"name": "a_dataset",
"out_size": 1,
"node_feat_size": 15,
"edge_feat_size": 5,
}
model = OGBGGIN(data_info,
embed_size=10,
num_layers=2,
virtual_node=virtual_node)
nfeat = torch.randn(num_nodes, data_info['node_feat_size'])
efeat = torch.randn(num_edges, data_info['edge_feat_size'])
model = OGBGGIN(
data_info, embed_size=10, num_layers=2, virtual_node=virtual_node
)
nfeat = torch.randn(num_nodes, data_info["node_feat_size"])
efeat = torch.randn(num_edges, data_info["edge_feat_size"])
model(g, nfeat, efeat)
def test_pna():
# Test for ogbg-mol datasets
data_info = {
'name': 'ogbg-molhiv',
'delta': 1,
'out_size': 1
}
model = PNA(data_info,
embed_size=10,
num_layers=2)
data_info = {"name": "ogbg-molhiv", "delta": 1, "out_size": 1}
model = PNA(data_info, embed_size=10, num_layers=2)
num_nodes = 5
num_edges = 15
g = dgl.rand_graph(num_nodes, num_edges)
......@@ -228,13 +200,11 @@ def test_pna():
# Test for non-ogbg-mol datasets
data_info = {
'name': 'a_dataset',
'node_feat_size': 15,
'delta': 1,
'out_size': 1
"name": "a_dataset",
"node_feat_size": 15,
"delta": 1,
"out_size": 1,
}
model = PNA(data_info,
embed_size=10,
num_layers=2)
nfeat = torch.randn(num_nodes, data_info['node_feat_size'])
model = PNA(data_info, embed_size=10, num_layers=2)
nfeat = torch.randn(num_nodes, data_info["node_feat_size"])
model(g, nfeat)
import os
import pytest
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products'])
@pytest.mark.parametrize(
"data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
],
)
def test_nodepred_data(data):
os.system(f'dgl configure nodepred --data {data} --model gcn')
assert os.path.exists(f'nodepred_{data}_gcn.yaml')
os.system(f"dgl configure nodepred --data {data} --model gcn")
assert os.path.exists(f"nodepred_{data}_gcn.yaml")
custom_cfg = f'custom_{data}_gcn.yaml'
os.system(f'dgl configure nodepred --data {data} --model gcn --cfg {custom_cfg}')
custom_cfg = f"custom_{data}_gcn.yaml"
os.system(
f"dgl configure nodepred --data {data} --model gcn --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg)
custom_script = f'{data}_gcn.py'
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}')
custom_script = f"{data}_gcn.py"
os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script)
@pytest.mark.parametrize('model', ['gcn', 'gat', 'sage', 'sgc', 'gin'])
@pytest.mark.parametrize("model", ["gcn", "gat", "sage", "sgc", "gin"])
def test_nodepred_model(model):
os.system(f'dgl configure nodepred --data cora --model {model}')
assert os.path.exists(f'nodepred_cora_{model}.yaml')
os.system(f"dgl configure nodepred --data cora --model {model}")
assert os.path.exists(f"nodepred_cora_{model}.yaml")
custom_cfg = f'custom_cora_{model}.yaml'
os.system(f'dgl configure nodepred --data cora --model {model} --cfg {custom_cfg}')
custom_cfg = f"custom_cora_{model}.yaml"
os.system(
f"dgl configure nodepred --data cora --model {model} --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg)
custom_script = f'cora_{model}.py'
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}')
custom_script = f"cora_{model}.py"
os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products'])
@pytest.mark.parametrize(
"data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
],
)
def test_nodepred_ns_data(data):
os.system(f'dgl configure nodepred-ns --data {data} --model gcn')
assert os.path.exists(f'nodepred-ns_{data}_gcn.yaml')
os.system(f"dgl configure nodepred-ns --data {data} --model gcn")
assert os.path.exists(f"nodepred-ns_{data}_gcn.yaml")
custom_cfg = f'ns-custom_{data}_gcn.yaml'
os.system(f'dgl configure nodepred-ns --data {data} --model gcn --cfg {custom_cfg}')
custom_cfg = f"ns-custom_{data}_gcn.yaml"
os.system(
f"dgl configure nodepred-ns --data {data} --model gcn --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg)
custom_script = f'ns-{data}_gcn.py'
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}')
custom_script = f"ns-{data}_gcn.py"
os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script)
@pytest.mark.parametrize('model', ['gcn', 'gat', 'sage'])
@pytest.mark.parametrize("model", ["gcn", "gat", "sage"])
def test_nodepred_ns_model(model):
os.system(f'dgl configure nodepred-ns --data cora --model {model}')
assert os.path.exists(f'nodepred-ns_cora_{model}.yaml')
os.system(f"dgl configure nodepred-ns --data cora --model {model}")
assert os.path.exists(f"nodepred-ns_cora_{model}.yaml")
custom_cfg = f'ns-custom_cora_{model}.yaml'
os.system(f'dgl configure nodepred-ns --data cora --model {model} --cfg {custom_cfg}')
custom_cfg = f"ns-custom_cora_{model}.yaml"
os.system(
f"dgl configure nodepred-ns --data cora --model {model} --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg)
custom_script = f'ns-cora_{model}.py'
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}')
custom_script = f"ns-cora_{model}.py"
os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products', 'ogbl-collab',
'ogbl-citation2'])
@pytest.mark.parametrize(
"data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
"ogbl-collab",
"ogbl-citation2",
],
)
def test_linkpred_data(data):
node_model = 'gcn'
edge_model = 'ele'
neg_sampler = 'global'
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml'
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format(
data, node_model, edge_model, neg_sampler, custom_cfg))
node_model = "gcn"
edge_model = "ele"
neg_sampler = "global"
custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system(
"dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py'
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script))
custom_script = (
"_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script)
@pytest.mark.parametrize('node_model', ['gcn' ,'gat', 'sage', 'sgc', 'gin'])
@pytest.mark.parametrize("node_model", ["gcn", "gat", "sage", "sgc", "gin"])
def test_linkpred_node_model(node_model):
data = 'cora'
edge_model = 'ele'
neg_sampler = 'global'
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml'
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format(
data, node_model, edge_model, neg_sampler, custom_cfg))
data = "cora"
edge_model = "ele"
neg_sampler = "global"
custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system(
"dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py'
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script))
custom_script = (
"_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script)
@pytest.mark.parametrize('edge_model', ['ele', 'bilinear'])
@pytest.mark.parametrize("edge_model", ["ele", "bilinear"])
def test_linkpred_edge_model(edge_model):
data = 'cora'
node_model = 'gcn'
neg_sampler = 'global'
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml'
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format(
data, node_model, edge_model, neg_sampler, custom_cfg))
data = "cora"
node_model = "gcn"
neg_sampler = "global"
custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system(
"dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py'
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script))
custom_script = (
"_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script)
@pytest.mark.parametrize('neg_sampler', ['global', 'persource', ''])
@pytest.mark.parametrize("neg_sampler", ["global", "persource", ""])
def test_linkpred_neg_sampler(neg_sampler):
data = 'cora'
node_model = 'gcn'
edge_model = 'ele'
custom_cfg = f'{data}_{node_model}_{edge_model}_{neg_sampler}.yaml'
if neg_sampler == '':
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --cfg {}'.format(
data, node_model, edge_model, custom_cfg))
data = "cora"
node_model = "gcn"
edge_model = "ele"
custom_cfg = f"{data}_{node_model}_{edge_model}_{neg_sampler}.yaml"
if neg_sampler == "":
os.system(
"dgl configure linkpred --data {} --node-model {} --edge-model {} --cfg {}".format(
data, node_model, edge_model, custom_cfg
)
)
else:
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format(
data, node_model, edge_model, neg_sampler, custom_cfg))
os.system(
"dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg)
custom_script = f'{data}_{node_model}_{edge_model}_{neg_sampler}.py'
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script))
custom_script = f"{data}_{node_model}_{edge_model}_{neg_sampler}.py"
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['csv', 'ogbg-molhiv', 'ogbg-molpcba'])
@pytest.mark.parametrize('model', ['gin', 'pna'])
@pytest.mark.parametrize("data", ["csv", "ogbg-molhiv", "ogbg-molpcba"])
@pytest.mark.parametrize("model", ["gin", "pna"])
def test_graphpred(data, model):
os.system('dgl configure graphpred --data {} --model {}'.format(data, model))
assert os.path.exists('graphpred_{}_{}.yaml'.format(data, model))
os.system(
"dgl configure graphpred --data {} --model {}".format(data, model)
)
assert os.path.exists("graphpred_{}_{}.yaml".format(data, model))
custom_cfg = 'custom_{}_{}.yaml'.format(data, model)
os.system('dgl configure graphpred --data {} --model {} --cfg {}'.format(data, model,
custom_cfg))
custom_cfg = "custom_{}_{}.yaml".format(data, model)
os.system(
"dgl configure graphpred --data {} --model {} --cfg {}".format(
data, model, custom_cfg
)
)
assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, model]) + '.py'
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script))
custom_script = "_".join([data, model]) + ".py"
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script)
@pytest.mark.parametrize('recipe',
['graphpred_hiv_gin.yaml',
'graphpred_hiv_pna.yaml',
'graphpred_pcba_gin.yaml',
'linkpred_cora_sage.yaml',
'linkpred_citation2_sage.yaml',
'linkpred_collab_sage.yaml',
'nodepred_citeseer_gat.yaml',
'nodepred_citeseer_gcn.yaml',
'nodepred_citeseer_sage.yaml',
'nodepred_cora_gat.yaml',
'nodepred_cora_gcn.yaml',
'nodepred_cora_sage.yaml',
'nodepred_pubmed_gat.yaml',
'nodepred_pubmed_gcn.yaml',
'nodepred_pubmed_sage.yaml',
'nodepred-ns_arxiv_gcn.yaml',
'nodepred-ns_product_sage.yaml'])
@pytest.mark.parametrize(
"recipe",
[
"graphpred_hiv_gin.yaml",
"graphpred_hiv_pna.yaml",
"graphpred_pcba_gin.yaml",
"linkpred_cora_sage.yaml",
"linkpred_citation2_sage.yaml",
"linkpred_collab_sage.yaml",
"nodepred_citeseer_gat.yaml",
"nodepred_citeseer_gcn.yaml",
"nodepred_citeseer_sage.yaml",
"nodepred_cora_gat.yaml",
"nodepred_cora_gcn.yaml",
"nodepred_cora_sage.yaml",
"nodepred_pubmed_gat.yaml",
"nodepred_pubmed_gcn.yaml",
"nodepred_pubmed_sage.yaml",
"nodepred-ns_arxiv_gcn.yaml",
"nodepred-ns_product_sage.yaml",
],
)
def test_recipe(recipe):
# Remove all generated yaml files
current_dir = os.listdir("./")
......@@ -160,19 +250,22 @@ def test_recipe(recipe):
if item.endswith(".yaml"):
os.remove(item)
os.system('dgl recipe get {}'.format(recipe))
os.system("dgl recipe get {}".format(recipe))
assert os.path.exists(recipe)
def test_node_cora():
os.system('dgl configure nodepred --data cora --model gcn')
os.system('dgl train --cfg nodepred_cora_gcn.yaml')
assert os.path.exists('results')
assert os.path.exists('results/run_0.pth')
os.system('dgl configure-apply nodepred --cpt results/run_0.pth')
assert os.path.exists('apply_nodepred_cora_gcn.yaml')
os.system('dgl configure-apply nodepred --data cora --cpt results/run_0.pth --cfg apply.yaml')
assert os.path.exists('apply.yaml')
os.system('dgl apply --cfg apply.yaml')
assert os.path.exists('apply_results/output.csv')
os.system('dgl export --cfg apply.yaml --output apply.py')
assert os.path.exists('apply.py')
os.system("dgl configure nodepred --data cora --model gcn")
os.system("dgl train --cfg nodepred_cora_gcn.yaml")
assert os.path.exists("results")
assert os.path.exists("results/run_0.pth")
os.system("dgl configure-apply nodepred --cpt results/run_0.pth")
assert os.path.exists("apply_nodepred_cora_gcn.yaml")
os.system(
"dgl configure-apply nodepred --data cora --cpt results/run_0.pth --cfg apply.yaml"
)
assert os.path.exists("apply.yaml")
os.system("dgl apply --cfg apply.yaml")
assert os.path.exists("apply_results/output.csv")
os.system("dgl export --cfg apply.yaml --output apply.py")
assert os.path.exists("apply.py")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment