Unverified Commit 89a4cc4d authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4694)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 303b150f
import dgl import unittest
import numpy as np
import backend as F import backend as F
import networkx as nx import networkx as nx
import unittest import numpy as np
import pytest import pytest
from test_utils.graph_cases import get_cases
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
from test_utils.graph_cases import get_cases
import dgl
@parametrize_idtype @parametrize_idtype
def test_sum_case1(idtype): def test_sum_case1(idtype):
# NOTE: If you want to update this test case, remember to update the docstring # NOTE: If you want to update this test case, remember to update the docstring
# example too!!! # example too!!!
g1 = dgl.graph(([0, 1], [1, 0]), idtype=idtype, device=F.ctx()) g1 = dgl.graph(([0, 1], [1, 0]), idtype=idtype, device=F.ctx())
g1.ndata['h'] = F.tensor([1., 2.]) g1.ndata["h"] = F.tensor([1.0, 2.0])
g2 = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g2 = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx())
g2.ndata['h'] = F.tensor([1., 2., 3.]) g2.ndata["h"] = F.tensor([1.0, 2.0, 3.0])
bg = dgl.batch([g1, g2]) bg = dgl.batch([g1, g2])
bg.ndata['w'] = F.tensor([.1, .2, .1, .5, .2]) bg.ndata["w"] = F.tensor([0.1, 0.2, 0.1, 0.5, 0.2])
assert F.allclose(F.tensor([3.]), dgl.sum_nodes(g1, 'h')) assert F.allclose(F.tensor([3.0]), dgl.sum_nodes(g1, "h"))
assert F.allclose(F.tensor([3., 6.]), dgl.sum_nodes(bg, 'h')) assert F.allclose(F.tensor([3.0, 6.0]), dgl.sum_nodes(bg, "h"))
assert F.allclose(F.tensor([.5, 1.7]), dgl.sum_nodes(bg, 'h', 'w')) assert F.allclose(F.tensor([0.5, 1.7]), dgl.sum_nodes(bg, "h", "w"))
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph'])) @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize('reducer', ['sum', 'max', 'mean']) @pytest.mark.parametrize("reducer", ["sum", "max", "mean"])
def test_reduce_readout(g, idtype, reducer): def test_reduce_readout(g, idtype, reducer):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2)) g.edata["h"] = F.randn((g.number_of_edges(), 2))
# Test.1: node readout # Test.1: node readout
x = dgl.readout_nodes(g, 'h', op=reducer) x = dgl.readout_nodes(g, "h", op=reducer)
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = dgl.readout_nodes(sg, 'h', op=reducer) sx = dgl.readout_nodes(sg, "h", op=reducer)
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_nodes'.format(reducer))(g, 'h') x = getattr(dgl, "{}_nodes".format(reducer))(g, "h")
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = getattr(dgl, '{}_nodes'.format(reducer))(sg, 'h') sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h")
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout # Test.2: edge readout
x = dgl.readout_edges(g, 'h', op=reducer) x = dgl.readout_edges(g, "h", op=reducer)
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = dgl.readout_edges(sg, 'h', op=reducer) sx = dgl.readout_edges(sg, "h", op=reducer)
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_edges'.format(reducer))(g, 'h') x = getattr(dgl, "{}_edges".format(reducer))(g, "h")
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = getattr(dgl, '{}_edges'.format(reducer))(sg, 'h') sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h")
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph'])) @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize('reducer', ['sum', 'max', 'mean']) @pytest.mark.parametrize("reducer", ["sum", "max", "mean"])
def test_weighted_reduce_readout(g, idtype, reducer): def test_weighted_reduce_readout(g, idtype, reducer):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.ndata['w'] = F.randn((g.number_of_nodes(), 1)) g.ndata["w"] = F.randn((g.number_of_nodes(), 1))
g.edata['h'] = F.randn((g.number_of_edges(), 2)) g.edata["h"] = F.randn((g.number_of_edges(), 2))
g.edata['w'] = F.randn((g.number_of_edges(), 1)) g.edata["w"] = F.randn((g.number_of_edges(), 1))
# Test.1: node readout # Test.1: node readout
x = dgl.readout_nodes(g, 'h', 'w', op=reducer) x = dgl.readout_nodes(g, "h", "w", op=reducer)
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = dgl.readout_nodes(sg, 'h', 'w', op=reducer) sx = dgl.readout_nodes(sg, "h", "w", op=reducer)
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_nodes'.format(reducer))(g, 'h', 'w') x = getattr(dgl, "{}_nodes".format(reducer))(g, "h", "w")
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = getattr(dgl, '{}_nodes'.format(reducer))(sg, 'h', 'w') sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h", "w")
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout # Test.2: edge readout
x = dgl.readout_edges(g, 'h', 'w', op=reducer) x = dgl.readout_edges(g, "h", "w", op=reducer)
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = dgl.readout_edges(sg, 'h', 'w', op=reducer) sx = dgl.readout_edges(sg, "h", "w", op=reducer)
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
x = getattr(dgl, '{}_edges'.format(reducer))(g, 'h', 'w') x = getattr(dgl, "{}_edges".format(reducer))(g, "h", "w")
# check correctness # check correctness
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
sx = getattr(dgl, '{}_edges'.format(reducer))(sg, 'h', 'w') sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h", "w")
subx.append(sx) subx.append(sx)
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph'])) @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
@pytest.mark.parametrize('descending', [True, False]) @pytest.mark.parametrize("descending", [True, False])
def test_topk(g, idtype, descending): def test_topk(g, idtype, descending):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
g.ndata['x'] = F.randn((g.number_of_nodes(), 3)) g.ndata["x"] = F.randn((g.number_of_nodes(), 3))
# Test.1: to test the case where k > number of nodes. # Test.1: to test the case where k > number of nodes.
dgl.topk_nodes(g, 'x', 100, sortby=-1) dgl.topk_nodes(g, "x", 100, sortby=-1)
# Test.2: test correctness # Test.2: test correctness
min_nnodes = F.asnumpy(g.batch_num_nodes()).min() min_nnodes = F.asnumpy(g.batch_num_nodes()).min()
if min_nnodes <= 1: if min_nnodes <= 1:
return return
k = min_nnodes - 1 k = min_nnodes - 1
val, indices = dgl.topk_nodes(g, 'x', k, descending=descending, sortby=-1) val, indices = dgl.topk_nodes(g, "x", k, descending=descending, sortby=-1)
print(k) print(k)
print(g.ndata['x']) print(g.ndata["x"])
print('val', val) print("val", val)
print('indices', indices) print("indices", indices)
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subval, subidx = [], [] subval, subidx = [], []
for sg in subg: for sg in subg:
subx = F.asnumpy(sg.ndata['x']) subx = F.asnumpy(sg.ndata["x"])
ai = np.argsort(subx[:,-1:].flatten()) ai = np.argsort(subx[:, -1:].flatten())
if descending: if descending:
ai = np.ascontiguousarray(ai[::-1]) ai = np.ascontiguousarray(ai[::-1])
subx = np.expand_dims(subx[ai[:k]], 0) subx = np.expand_dims(subx[ai[:k]], 0)
...@@ -150,28 +156,28 @@ def test_topk(g, idtype, descending): ...@@ -150,28 +156,28 @@ def test_topk(g, idtype, descending):
assert F.allclose(indices, F.cat(subidx, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0))
# Test.3: sorby=None # Test.3: sorby=None
dgl.topk_nodes(g, 'x', k, sortby=None) dgl.topk_nodes(g, "x", k, sortby=None)
g.edata['x'] = F.randn((g.number_of_edges(), 3)) g.edata["x"] = F.randn((g.number_of_edges(), 3))
# Test.4: topk edges where k > number of edges. # Test.4: topk edges where k > number of edges.
dgl.topk_edges(g, 'x', 100, sortby=-1) dgl.topk_edges(g, "x", 100, sortby=-1)
# Test.5: topk edges test correctness # Test.5: topk edges test correctness
min_nedges = F.asnumpy(g.batch_num_edges()).min() min_nedges = F.asnumpy(g.batch_num_edges()).min()
if min_nedges <= 1: if min_nedges <= 1:
return return
k = min_nedges - 1 k = min_nedges - 1
val, indices = dgl.topk_edges(g, 'x', k, descending=descending, sortby=-1) val, indices = dgl.topk_edges(g, "x", k, descending=descending, sortby=-1)
print(k) print(k)
print(g.edata['x']) print(g.edata["x"])
print('val', val) print("val", val)
print('indices', indices) print("indices", indices)
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subval, subidx = [], [] subval, subidx = [], []
for sg in subg: for sg in subg:
subx = F.asnumpy(sg.edata['x']) subx = F.asnumpy(sg.edata["x"])
ai = np.argsort(subx[:,-1:].flatten()) ai = np.argsort(subx[:, -1:].flatten())
if descending: if descending:
ai = np.ascontiguousarray(ai[::-1]) ai = np.ascontiguousarray(ai[::-1])
subx = np.expand_dims(subx[ai[:k]], 0) subx = np.expand_dims(subx[ai[:k]], 0)
...@@ -181,45 +187,51 @@ def test_topk(g, idtype, descending): ...@@ -181,45 +187,51 @@ def test_topk(g, idtype, descending):
assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0))
assert F.allclose(indices, F.cat(subidx, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0))
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph'])) @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
def test_softmax(g, idtype): def test_softmax(g, idtype):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2)) g.edata["h"] = F.randn((g.number_of_edges(), 2))
# Test.1: node readout # Test.1: node readout
x = dgl.softmax_nodes(g, 'h') x = dgl.softmax_nodes(g, "h")
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
subx.append(F.softmax(sg.ndata['h'], dim=0)) subx.append(F.softmax(sg.ndata["h"], dim=0))
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
# Test.2: edge readout # Test.2: edge readout
x = dgl.softmax_edges(g, 'h') x = dgl.softmax_edges(g, "h")
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
subx = [] subx = []
for sg in subg: for sg in subg:
subx.append(F.softmax(sg.edata['h'], dim=0)) subx.append(F.softmax(sg.edata["h"], dim=0))
assert F.allclose(x, F.cat(subx, dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph'])) @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
def test_broadcast(idtype, g): def test_broadcast(idtype, g):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
gfeat = F.randn((g.batch_size, 3)) gfeat = F.randn((g.batch_size, 3))
# Test.0: broadcast_nodes # Test.0: broadcast_nodes
g.ndata['h'] = dgl.broadcast_nodes(g, gfeat) g.ndata["h"] = dgl.broadcast_nodes(g, gfeat)
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
for i, sg in enumerate(subg): for i, sg in enumerate(subg):
assert F.allclose(sg.ndata['h'], assert F.allclose(
F.repeat(F.reshape(gfeat[i], (1,3)), sg.number_of_nodes(), dim=0)) sg.ndata["h"],
F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_nodes(), dim=0),
)
# Test.1: broadcast_edges # Test.1: broadcast_edges
g.edata['h'] = dgl.broadcast_edges(g, gfeat) g.edata["h"] = dgl.broadcast_edges(g, gfeat)
subg = dgl.unbatch(g) subg = dgl.unbatch(g)
for i, sg in enumerate(subg): for i, sg in enumerate(subg):
assert F.allclose(sg.edata['h'], assert F.allclose(
F.repeat(F.reshape(gfeat[i], (1,3)), sg.number_of_edges(), dim=0)) sg.edata["h"],
F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_edges(), dim=0),
)
import backend as F import backend as F
import numpy as np import numpy as np
import dgl
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
@parametrize_idtype @parametrize_idtype
def test_node_removal(idtype): def test_node_removal(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -10,27 +12,30 @@ def test_node_removal(idtype): ...@@ -10,27 +12,30 @@ def test_node_removal(idtype):
g.add_nodes(10) g.add_nodes(10)
g.add_edge(0, 0) g.add_edge(0, 0)
assert g.number_of_nodes() == 10 assert g.number_of_nodes() == 10
g.ndata['id'] = F.arange(0, 10) g.ndata["id"] = F.arange(0, 10)
# remove nodes # remove nodes
g.remove_nodes(range(4, 7)) g.remove_nodes(range(4, 7))
assert g.number_of_nodes() == 7 assert g.number_of_nodes() == 7
assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9])) assert F.array_equal(g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9]))
assert dgl.NID not in g.ndata assert dgl.NID not in g.ndata
assert dgl.EID not in g.edata assert dgl.EID not in g.edata
# add nodes # add nodes
g.add_nodes(3) g.add_nodes(3)
assert g.number_of_nodes() == 10 assert g.number_of_nodes() == 10
assert F.array_equal(g.ndata['id'], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0])) assert F.array_equal(
g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0])
)
# remove nodes # remove nodes
g.remove_nodes(range(1, 4), store_ids=True) g.remove_nodes(range(1, 4), store_ids=True)
assert g.number_of_nodes() == 7 assert g.number_of_nodes() == 7
assert F.array_equal(g.ndata['id'], F.tensor([0, 7, 8, 9, 0, 0, 0])) assert F.array_equal(g.ndata["id"], F.tensor([0, 7, 8, 9, 0, 0, 0]))
assert dgl.NID in g.ndata assert dgl.NID in g.ndata
assert dgl.EID in g.edata assert dgl.EID in g.edata
@parametrize_idtype @parametrize_idtype
def test_multigraph_node_removal(idtype): def test_multigraph_node_removal(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -59,6 +64,7 @@ def test_multigraph_node_removal(idtype): ...@@ -59,6 +64,7 @@ def test_multigraph_node_removal(idtype):
assert g.number_of_nodes() == 3 assert g.number_of_nodes() == 3
assert g.number_of_edges() == 6 assert g.number_of_edges() == 6
@parametrize_idtype @parametrize_idtype
def test_multigraph_edge_removal(idtype): def test_multigraph_edge_removal(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -86,6 +92,7 @@ def test_multigraph_edge_removal(idtype): ...@@ -86,6 +92,7 @@ def test_multigraph_edge_removal(idtype):
assert g.number_of_nodes() == 5 assert g.number_of_nodes() == 5
assert g.number_of_edges() == 8 assert g.number_of_edges() == 8
@parametrize_idtype @parametrize_idtype
def test_edge_removal(idtype): def test_edge_removal(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -94,13 +101,15 @@ def test_edge_removal(idtype): ...@@ -94,13 +101,15 @@ def test_edge_removal(idtype):
for i in range(5): for i in range(5):
for j in range(5): for j in range(5):
g.add_edge(i, j) g.add_edge(i, j)
g.edata['id'] = F.arange(0, 25) g.edata["id"] = F.arange(0, 25)
# remove edges # remove edges
g.remove_edges(range(13, 20)) g.remove_edges(range(13, 20))
assert g.number_of_nodes() == 5 assert g.number_of_nodes() == 5
assert g.number_of_edges() == 18 assert g.number_of_edges() == 18
assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)))) assert F.array_equal(
g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25)))
)
assert dgl.NID not in g.ndata assert dgl.NID not in g.ndata
assert dgl.EID not in g.edata assert dgl.EID not in g.edata
...@@ -108,15 +117,20 @@ def test_edge_removal(idtype): ...@@ -108,15 +117,20 @@ def test_edge_removal(idtype):
g.add_edge(3, 3) g.add_edge(3, 3)
assert g.number_of_nodes() == 5 assert g.number_of_nodes() == 5
assert g.number_of_edges() == 19 assert g.number_of_edges() == 19
assert F.array_equal(g.edata['id'], F.tensor(list(range(13)) + list(range(20, 25)) + [0])) assert F.array_equal(
g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25)) + [0])
)
# remove edges # remove edges
g.remove_edges(range(2, 10), store_ids=True) g.remove_edges(range(2, 10), store_ids=True)
assert g.number_of_nodes() == 5 assert g.number_of_nodes() == 5
assert g.number_of_edges() == 11 assert g.number_of_edges() == 11
assert F.array_equal(g.edata['id'], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0])) assert F.array_equal(
g.edata["id"], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0])
)
assert dgl.EID in g.edata assert dgl.EID in g.edata
@parametrize_idtype @parametrize_idtype
def test_node_and_edge_removal(idtype): def test_node_and_edge_removal(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -125,7 +139,7 @@ def test_node_and_edge_removal(idtype): ...@@ -125,7 +139,7 @@ def test_node_and_edge_removal(idtype):
for i in range(10): for i in range(10):
for j in range(10): for j in range(10):
g.add_edge(i, j) g.add_edge(i, j)
g.edata['id'] = F.arange(0, 100) g.edata["id"] = F.arange(0, 100)
assert g.number_of_nodes() == 10 assert g.number_of_nodes() == 10
assert g.number_of_edges() == 100 assert g.number_of_edges() == 100
...@@ -156,6 +170,7 @@ def test_node_and_edge_removal(idtype): ...@@ -156,6 +170,7 @@ def test_node_and_edge_removal(idtype):
assert g.number_of_nodes() == 10 assert g.number_of_nodes() == 10
assert g.number_of_edges() == 48 assert g.number_of_edges() == 48
@parametrize_idtype @parametrize_idtype
def test_node_frame(idtype): def test_node_frame(idtype):
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -163,11 +178,12 @@ def test_node_frame(idtype): ...@@ -163,11 +178,12 @@ def test_node_frame(idtype):
g.add_nodes(10) g.add_nodes(10)
data = np.random.rand(10, 3) data = np.random.rand(10, 3)
new_data = data.take([0, 1, 2, 7, 8, 9], axis=0) new_data = data.take([0, 1, 2, 7, 8, 9], axis=0)
g.ndata['h'] = F.tensor(data) g.ndata["h"] = F.tensor(data)
# remove nodes # remove nodes
g.remove_nodes(range(3, 7)) g.remove_nodes(range(3, 7))
assert F.allclose(g.ndata['h'], F.tensor(new_data)) assert F.allclose(g.ndata["h"], F.tensor(new_data))
@parametrize_idtype @parametrize_idtype
def test_edge_frame(idtype): def test_edge_frame(idtype):
...@@ -177,11 +193,12 @@ def test_edge_frame(idtype): ...@@ -177,11 +193,12 @@ def test_edge_frame(idtype):
g.add_edges(list(range(10)), list(range(1, 10)) + [0]) g.add_edges(list(range(10)), list(range(1, 10)) + [0])
data = np.random.rand(10, 3) data = np.random.rand(10, 3)
new_data = data.take([0, 1, 2, 7, 8, 9], axis=0) new_data = data.take([0, 1, 2, 7, 8, 9], axis=0)
g.edata['h'] = F.tensor(data) g.edata["h"] = F.tensor(data)
# remove edges # remove edges
g.remove_edges(range(3, 7)) g.remove_edges(range(3, 7))
assert F.allclose(g.edata['h'], F.tensor(new_data)) assert F.allclose(g.edata["h"], F.tensor(new_data))
@parametrize_idtype @parametrize_idtype
def test_issue1287(idtype): def test_issue1287(idtype):
...@@ -192,20 +209,21 @@ def test_issue1287(idtype): ...@@ -192,20 +209,21 @@ def test_issue1287(idtype):
g.add_nodes(5) g.add_nodes(5)
g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0]) g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0])
g.remove_nodes([0, 1]) g.remove_nodes([0, 1])
g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2)) g.edata["h"] = F.randn((g.number_of_edges(), 2))
# remove edges # remove edges
g = dgl.DGLGraph() g = dgl.DGLGraph()
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
g.add_nodes(5) g.add_nodes(5)
g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0]) g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0])
g.remove_edges([0, 1]) g.remove_edges([0, 1])
g = g.to(F.ctx()) g = g.to(F.ctx())
g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.ndata["h"] = F.randn((g.number_of_nodes(), 3))
g.edata['h'] = F.randn((g.number_of_edges(), 2)) g.edata["h"] = F.randn((g.number_of_edges(), 2))
if __name__ == '__main__': if __name__ == "__main__":
test_node_removal() test_node_removal()
test_edge_removal() test_edge_removal()
test_multigraph_node_removal() test_multigraph_node_removal()
......
import os
import tempfile
import time
import unittest
import backend as F import backend as F
import numpy as np import numpy as np
import scipy as sp
import time
import tempfile
import os
import pytest import pytest
import unittest import scipy as sp
from dgl import DGLGraph
import dgl import dgl
import dgl.ndarray as nd import dgl.ndarray as nd
from dgl.data.utils import load_labels, save_tensors, load_tensors from dgl import DGLGraph
from dgl.data.utils import load_labels, load_tensors, save_tensors
np.random.seed(44) np.random.seed(44)
def generate_rand_graph(n, is_hetero): def generate_rand_graph(n, is_hetero):
arr = (sp.sparse.random(n, n, density=0.1, arr = (sp.sparse.random(n, n, density=0.1, format="coo") != 0).astype(
format='coo') != 0).astype(np.int64) np.int64
)
if is_hetero: if is_hetero:
return dgl.from_scipy(arr) return dgl.from_scipy(arr)
else: else:
...@@ -28,15 +30,15 @@ def construct_graph(n, is_hetero): ...@@ -28,15 +30,15 @@ def construct_graph(n, is_hetero):
g_list = [] g_list = []
for i in range(n): for i in range(n):
g = generate_rand_graph(30, is_hetero) g = generate_rand_graph(30, is_hetero)
g.edata['e1'] = F.randn((g.number_of_edges(), 32)) g.edata["e1"] = F.randn((g.number_of_edges(), 32))
g.edata['e2'] = F.ones((g.number_of_edges(), 32)) g.edata["e2"] = F.ones((g.number_of_edges(), 32))
g.ndata['n1'] = F.randn((g.number_of_nodes(), 64)) g.ndata["n1"] = F.randn((g.number_of_nodes(), 64))
g_list.append(g) g_list.append(g)
return g_list return g_list
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented") @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False]) @pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_with_feature(is_hetero): def test_graph_serialize_with_feature(is_hetero):
num_graphs = 100 num_graphs = 100
...@@ -66,19 +68,19 @@ def test_graph_serialize_with_feature(is_hetero): ...@@ -66,19 +68,19 @@ def test_graph_serialize_with_feature(is_hetero):
assert F.allclose(load_g.nodes(), g_list[idx].nodes()) assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid') load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges('uv', 'eid') g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1]) assert F.allclose(load_edges[1], g_edges[1])
assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1']) assert F.allclose(load_g.edata["e1"], g_list[idx].edata["e1"])
assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2']) assert F.allclose(load_g.edata["e2"], g_list[idx].edata["e2"])
assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1']) assert F.allclose(load_g.ndata["n1"], g_list[idx].ndata["n1"])
os.unlink(path) os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented") @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False]) @pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_without_feature(is_hetero): def test_graph_serialize_without_feature(is_hetero):
num_graphs = 100 num_graphs = 100
g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)] g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
...@@ -98,15 +100,16 @@ def test_graph_serialize_without_feature(is_hetero): ...@@ -98,15 +100,16 @@ def test_graph_serialize_without_feature(is_hetero):
assert F.allclose(load_g.nodes(), g_list[idx].nodes()) assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid') load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges('uv', 'eid') g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1]) assert F.allclose(load_edges[1], g_edges[1])
os.unlink(path) os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@pytest.mark.parametrize('is_hetero', [True, False]) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
def test_graph_serialize_with_labels(is_hetero): def test_graph_serialize_with_labels(is_hetero):
num_graphs = 100 num_graphs = 100
g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)] g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
...@@ -122,16 +125,16 @@ def test_graph_serialize_with_labels(is_hetero): ...@@ -122,16 +125,16 @@ def test_graph_serialize_with_labels(is_hetero):
idx_list = np.random.permutation(np.arange(num_graphs)).tolist() idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
loadg_list, l_labels0 = dgl.load_graphs(path, idx_list) loadg_list, l_labels0 = dgl.load_graphs(path, idx_list)
l_labels = load_labels(path) l_labels = load_labels(path)
assert F.allclose(l_labels['label'], labels['label']) assert F.allclose(l_labels["label"], labels["label"])
assert F.allclose(l_labels0['label'], labels['label']) assert F.allclose(l_labels0["label"], labels["label"])
idx = idx_list[0] idx = idx_list[0]
load_g = loadg_list[0] load_g = loadg_list[0]
assert F.allclose(load_g.nodes(), g_list[idx].nodes()) assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid') load_edges = load_g.all_edges("uv", "eid")
g_edges = g_list[idx].all_edges('uv', 'eid') g_edges = g_list[idx].all_edges("uv", "eid")
assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1]) assert F.allclose(load_edges[1], g_edges[1])
...@@ -144,8 +147,10 @@ def test_serialize_tensors(): ...@@ -144,8 +147,10 @@ def test_serialize_tensors():
path = f.name path = f.name
f.close() f.close()
tensor_dict = {"a": F.tensor( tensor_dict = {
[1, 3, -1, 0], dtype=F.int64), "1@1": F.tensor([1.5, 2], dtype=F.float32)} "a": F.tensor([1, 3, -1, 0], dtype=F.int64),
"1@1": F.tensor([1.5, 2], dtype=F.float32),
}
save_tensors(path, tensor_dict) save_tensors(path, tensor_dict)
...@@ -154,7 +159,8 @@ def test_serialize_tensors(): ...@@ -154,7 +159,8 @@ def test_serialize_tensors():
for key in tensor_dict: for key in tensor_dict:
assert key in load_tensor_dict assert key in load_tensor_dict
assert np.array_equal( assert np.array_equal(
F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key])) F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key])
)
load_nd_dict = load_tensors(path, return_dgl_ndarray=True) load_nd_dict = load_tensors(path, return_dgl_ndarray=True)
...@@ -162,7 +168,8 @@ def test_serialize_tensors(): ...@@ -162,7 +168,8 @@ def test_serialize_tensors():
assert key in load_nd_dict assert key in load_nd_dict
assert isinstance(load_nd_dict[key], nd.NDArray) assert isinstance(load_nd_dict[key], nd.NDArray)
assert np.array_equal( assert np.array_equal(
load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key])) load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key])
)
os.unlink(path) os.unlink(path)
...@@ -185,103 +192,120 @@ def test_serialize_empty_dict(): ...@@ -185,103 +192,120 @@ def test_serialize_empty_dict():
def test_load_old_files1(): def test_load_old_files1():
loadg_list, _ = dgl.load_graphs(os.path.join( loadg_list, _ = dgl.load_graphs(
os.path.dirname(__file__), "data/1.bin")) os.path.join(os.path.dirname(__file__), "data/1.bin")
)
idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load( idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True) os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True
)
load_g = loadg_list[idx] load_g = loadg_list[idx]
load_edges = load_g.all_edges('uv', 'eid') load_edges = load_g.all_edges("uv", "eid")
assert np.allclose(F.asnumpy(load_edges[0]), edge0) assert np.allclose(F.asnumpy(load_edges[0]), edge0)
assert np.allclose(F.asnumpy(load_edges[1]), edge1) assert np.allclose(F.asnumpy(load_edges[1]), edge1)
assert np.allclose(F.asnumpy(load_g.edata['e1']), edata_e1) assert np.allclose(F.asnumpy(load_g.edata["e1"]), edata_e1)
assert np.allclose(F.asnumpy(load_g.edata['e2']), edata_e2) assert np.allclose(F.asnumpy(load_g.edata["e2"]), edata_e2)
assert np.allclose(F.asnumpy(load_g.ndata['n1']), ndata_n1) assert np.allclose(F.asnumpy(load_g.ndata["n1"]), ndata_n1)
def test_load_old_files2(): def test_load_old_files2():
loadg_list, labels0 = dgl.load_graphs(os.path.join( loadg_list, labels0 = dgl.load_graphs(
os.path.dirname(__file__), "data/2.bin")) os.path.join(os.path.dirname(__file__), "data/2.bin")
labels1 = load_labels(os.path.join( )
os.path.dirname(__file__), "data/2.bin")) labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin"))
idx, edges0, edges1, np_labels = np.load(os.path.join( idx, edges0, edges1, np_labels = np.load(
os.path.dirname(__file__), "data/2.npy"), allow_pickle=True) os.path.join(os.path.dirname(__file__), "data/2.npy"), allow_pickle=True
assert np.allclose(F.asnumpy(labels0['label']), np_labels) )
assert np.allclose(F.asnumpy(labels1['label']), np_labels) assert np.allclose(F.asnumpy(labels0["label"]), np_labels)
assert np.allclose(F.asnumpy(labels1["label"]), np_labels)
load_g = loadg_list[idx] load_g = loadg_list[idx]
print(load_g) print(load_g)
load_edges = load_g.all_edges('uv', 'eid') load_edges = load_g.all_edges("uv", "eid")
assert np.allclose(F.asnumpy(load_edges[0]), edges0) assert np.allclose(F.asnumpy(load_edges[0]), edges0)
assert np.allclose(F.asnumpy(load_edges[1]), edges1) assert np.allclose(F.asnumpy(load_edges[1]), edges1)
def create_heterographs(idtype): def create_heterographs(idtype):
g_x = dgl.heterograph({ g_x = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype) {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
g_y = dgl.heterograph({ )
('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr') g_y = dgl.heterograph(
g_x.ndata['h'] = F.randn((4, 3)) {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
g_x.edata['w'] = F.randn((3, 2)) ).formats("csr")
g_y.ndata['hh'] = F.ones((4, 5)) g_x.ndata["h"] = F.randn((4, 3))
g_y.edata['ww'] = F.randn((2, 10)) g_x.edata["w"] = F.randn((3, 2))
g = dgl.heterograph({ g_y.ndata["hh"] = F.ones((4, 5))
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]), g_y.edata["ww"] = F.randn((2, 10))
('user', 'knows', 'user'): ([0, 2], [2, 3]) g = dgl.heterograph(
}, idtype=idtype) {
g.nodes['user'].data['h'] = g_x.ndata['h'] ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
g.nodes['user'].data['hh'] = g_y.ndata['hh'] ("user", "knows", "user"): ([0, 2], [2, 3]),
g.edges['follows'].data['w'] = g_x.edata['w'] },
g.edges['knows'].data['ww'] = g_y.edata['ww'] idtype=idtype,
)
g.nodes["user"].data["h"] = g_x.ndata["h"]
g.nodes["user"].data["hh"] = g_y.ndata["hh"]
g.edges["follows"].data["w"] = g_x.edata["w"]
g.edges["knows"].data["ww"] = g_y.edata["ww"]
return [g, g_x, g_y] return [g, g_x, g_y]
def create_heterographs2(idtype): def create_heterographs2(idtype):
g_x = dgl.heterograph({ g_x = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype) {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
g_y = dgl.heterograph({ )
('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr') g_y = dgl.heterograph(
{("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
).formats("csr")
g_z = dgl.heterograph( g_z = dgl.heterograph(
{('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4])}, idtype=idtype) {("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4])}, idtype=idtype
g_x.ndata['h'] = F.randn((4, 3)) )
g_x.edata['w'] = F.randn((3, 2)) g_x.ndata["h"] = F.randn((4, 3))
g_y.ndata['hh'] = F.ones((4, 5)) g_x.edata["w"] = F.randn((3, 2))
g_y.edata['ww'] = F.randn((2, 10)) g_y.ndata["hh"] = F.ones((4, 5))
g = dgl.heterograph({ g_y.edata["ww"] = F.randn((2, 10))
('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]), g = dgl.heterograph(
('user', 'knows', 'user'): ([0, 2], [2, 3]), {
('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4]) ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
}, idtype=idtype) ("user", "knows", "user"): ([0, 2], [2, 3]),
g.nodes['user'].data['h'] = g_x.ndata['h'] ("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4]),
g.edges['follows'].data['w'] = g_x.edata['w'] },
g.nodes['user'].data['hh'] = g_y.ndata['hh'] idtype=idtype,
g.edges[('user', 'knows', 'user')].data['ww'] = g_y.edata['ww'] )
g.nodes["user"].data["h"] = g_x.ndata["h"]
g.edges["follows"].data["w"] = g_x.edata["w"]
g.nodes["user"].data["hh"] = g_y.ndata["hh"]
g.edges[("user", "knows", "user")].data["ww"] = g_y.edata["ww"]
return [g, g_x, g_y, g_z] return [g, g_x, g_y, g_z]
def test_deserialize_old_heterograph_file(): def test_deserialize_old_heterograph_file():
path = os.path.join( path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
os.path.dirname(__file__), "data/hetero1.bin")
g_list, label_dict = dgl.load_graphs(path) g_list, label_dict = dgl.load_graphs(path)
assert g_list[0].idtype == F.int64 assert g_list[0].idtype == F.int64
assert g_list[3].idtype == F.int32 assert g_list[3].idtype == F.int32
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[5].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[5].nodes["user"].data["hh"]), np.ones((4, 5))
edges = g_list[0]['follows'].edges() )
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
assert F.allclose(label_dict['graph_label'], F.ones(54)) assert F.allclose(label_dict["graph_label"], F.ones(54))
def create_old_heterograph_files(): def create_old_heterograph_files():
path = os.path.join( path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
os.path.dirname(__file__), "data/hetero1.bin")
g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32) g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
labels_dict = {"graph_label": F.ones(54)} labels_dict = {"graph_label": F.ones(54)}
dgl.save_graphs(path, g_list0, labels_dict) dgl.save_graphs(path, g_list0, labels_dict)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented") @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
def test_serialize_heterograph(): def test_serialize_heterograph():
f = tempfile.NamedTemporaryFile(delete=False) f = tempfile.NamedTemporaryFile(delete=False)
path = f.name path = f.name
...@@ -295,15 +319,17 @@ def test_serialize_heterograph(): ...@@ -295,15 +319,17 @@ def test_serialize_heterograph():
for i in range(len(g_list0)): for i in range(len(g_list0)):
for j, etypes in enumerate(g_list0[i].canonical_etypes): for j, etypes in enumerate(g_list0[i].canonical_etypes):
assert g_list[i].canonical_etypes[j] == etypes assert g_list[i].canonical_etypes[j] == etypes
#assert g_list[1].restrict_format() == 'any' # assert g_list[1].restrict_format() == 'any'
#assert g_list[2].restrict_format() == 'csr' # assert g_list[2].restrict_format() == 'csr'
assert g_list[4].idtype == F.int32 assert g_list[4].idtype == F.int32
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[6].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[6].nodes["user"].data["hh"]), np.ones((4, 5))
edges = g_list[0]['follows'].edges() )
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
for i in range(len(g_list)): for i in range(len(g_list)):
...@@ -311,12 +337,13 @@ def test_serialize_heterograph(): ...@@ -311,12 +337,13 @@ def test_serialize_heterograph():
assert g_list[i].etypes == g_list0[i].etypes assert g_list[i].etypes == g_list0[i].etypes
# test set feature after load_graph # test set feature after load_graph
g_list[3].nodes['user'].data['test'] = F.tensor([0, 1, 2, 4]) g_list[3].nodes["user"].data["test"] = F.tensor([0, 1, 2, 4])
g_list[3].edata['test'] = F.tensor([0, 1, 2]) g_list[3].edata["test"] = F.tensor([0, 1, 2])
os.unlink(path) os.unlink(path)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.skip(reason="lack of permission on CI") @pytest.mark.skip(reason="lack of permission on CI")
def test_serialize_heterograph_s3(): def test_serialize_heterograph_s3():
path = "s3://dglci-data-test/graph2.bin" path = "s3://dglci-data-test/graph2.bin"
...@@ -325,30 +352,31 @@ def test_serialize_heterograph_s3(): ...@@ -325,30 +352,31 @@ def test_serialize_heterograph_s3():
g_list = dgl.load_graphs(path, [0, 2, 5]) g_list = dgl.load_graphs(path, [0, 2, 5])
assert g_list[0].idtype == F.int64 assert g_list[0].idtype == F.int64
#assert g_list[1].restrict_format() == 'csr' # assert g_list[1].restrict_format() == 'csr'
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[1].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[1].nodes["user"].data["hh"]), np.ones((4, 5))
)
assert np.allclose( assert np.allclose(
F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5))) F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
edges = g_list[0]['follows'].edges() )
edges = g_list[0]["follows"].edges()
assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
if __name__ == "__main__": if __name__ == "__main__":
pass pass
#test_graph_serialize_with_feature(True) # test_graph_serialize_with_feature(True)
#test_graph_serialize_with_feature(False) # test_graph_serialize_with_feature(False)
#test_graph_serialize_without_feature(True) # test_graph_serialize_without_feature(True)
#test_graph_serialize_without_feature(False) # test_graph_serialize_without_feature(False)
#test_graph_serialize_with_labels(True) # test_graph_serialize_with_labels(True)
#test_graph_serialize_with_labels(False) # test_graph_serialize_with_labels(False)
#test_serialize_tensors() # test_serialize_tensors()
#test_serialize_empty_dict() # test_serialize_empty_dict()
# test_load_old_files1() # test_load_old_files1()
test_load_old_files2() test_load_old_files2()
#test_serialize_heterograph() # test_serialize_heterograph()
#test_serialize_heterograph_s3() # test_serialize_heterograph_s3()
#test_deserialize_old_heterograph_file() # test_deserialize_old_heterograph_file()
#create_old_heterograph_files() # create_old_heterograph_files()
import dgl
import dgl.function as fn
from collections import Counter
import numpy as np
import scipy.sparse as ssp
import itertools import itertools
import unittest
from collections import Counter
import backend as F import backend as F
import networkx as nx import networkx as nx
import unittest, pytest import numpy as np
from dgl import DGLError import pytest
import scipy.sparse as ssp
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
def create_test_heterograph(num_nodes, num_adj, idtype): def create_test_heterograph(num_nodes, num_adj, idtype):
if isinstance(num_adj, int): if isinstance(num_adj, int):
num_adj = [num_adj, num_adj+1] num_adj = [num_adj, num_adj + 1]
num_adj_list = list(np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes)) num_adj_list = list(
np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes)
)
src = np.concatenate([[i] * num_adj_list[i] for i in range(num_nodes)]) src = np.concatenate([[i] * num_adj_list[i] for i in range(num_nodes)])
dst = [np.random.choice(num_nodes, nadj, replace=False) for nadj in num_adj_list] dst = [
np.random.choice(num_nodes, nadj, replace=False)
for nadj in num_adj_list
]
dst = np.concatenate(dst) dst = np.concatenate(dst)
return dgl.graph((src, dst), idtype=idtype) return dgl.graph((src, dst), idtype=idtype)
def check_sort(spm, tag_arr=None, tag_pos=None): def check_sort(spm, tag_arr=None, tag_pos=None):
if tag_arr is None: if tag_arr is None:
tag_arr = np.arange(spm.shape[0]) tag_arr = np.arange(spm.shape[0])
...@@ -37,18 +47,20 @@ def check_sort(spm, tag_arr=None, tag_pos=None): ...@@ -37,18 +47,20 @@ def check_sort(spm, tag_arr=None, tag_pos=None):
# `tag_pos_ptr` is the expected tag value. Here we check whether the # `tag_pos_ptr` is the expected tag value. Here we check whether the
# tag value is equal to `tag_pos_ptr` # tag value is equal to `tag_pos_ptr`
return False return False
if tag_arr[dst[j]] > tag_arr[dst[j+1]]: if tag_arr[dst[j]] > tag_arr[dst[j + 1]]:
# The tag should be in descending order after sorting # The tag should be in descending order after sorting
return False return False
if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j+1]]: if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j + 1]]:
if j+1 != int(tag_pos_row[tag_pos_ptr+1]): if j + 1 != int(tag_pos_row[tag_pos_ptr + 1]):
# The boundary of tag should be consistent with `tag_pos` # The boundary of tag should be consistent with `tag_pos`
return False return False
tag_pos_ptr = tag_arr[dst[j+1]] tag_pos_ptr = tag_arr[dst[j + 1]]
return True return True
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sorting by tag not implemented") @unittest.skipIf(
F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
@parametrize_idtype @parametrize_idtype
def test_sort_with_tag(idtype): def test_sort_with_tag(idtype):
num_nodes, num_adj, num_tags = 200, [20, 50], 5 num_nodes, num_adj, num_tags = 200, [20, 50], 5
...@@ -58,42 +70,50 @@ def test_sort_with_tag(idtype): ...@@ -58,42 +70,50 @@ def test_sort_with_tag(idtype):
edge_tag_dst = F.gather_row(tag, F.tensor(dst)) edge_tag_dst = F.gather_row(tag, F.tensor(dst))
edge_tag_src = F.gather_row(tag, F.tensor(src)) edge_tag_src = F.gather_row(tag, F.tensor(src))
for tag_type in ['node', 'edge']: for tag_type in ["node", "edge"]:
new_g = dgl.sort_csr_by_tag( new_g = dgl.sort_csr_by_tag(
g, tag if tag_type == 'node' else edge_tag_dst, tag_type=tag_type) g, tag if tag_type == "node" else edge_tag_dst, tag_type=tag_type
old_csr = g.adjacency_matrix(scipy_fmt='csr') )
new_csr = new_g.adjacency_matrix(scipy_fmt='csr') old_csr = g.adjacency_matrix(scipy_fmt="csr")
assert(check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"])) new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
assert(not check_sort(old_csr, tag)) # Check the original csr is not modified. assert check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"])
assert not check_sort(
old_csr, tag
) # Check the original csr is not modified.
for tag_type in ['node', 'edge']: for tag_type in ["node", "edge"]:
new_g = dgl.sort_csc_by_tag( new_g = dgl.sort_csc_by_tag(
g, tag if tag_type == 'node' else edge_tag_src, tag_type=tag_type) g, tag if tag_type == "node" else edge_tag_src, tag_type=tag_type
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr') )
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr') old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
assert(check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"])) new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
assert(not check_sort(old_csc, tag)) assert check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"])
assert not check_sort(old_csc, tag)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sorting by tag not implemented")
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
@parametrize_idtype @parametrize_idtype
def test_sort_with_tag_bipartite(idtype): def test_sort_with_tag_bipartite(idtype):
num_nodes, num_adj, num_tags = 200, [20, 50], 5 num_nodes, num_adj, num_tags = 200, [20, 50], 5
g = create_test_heterograph(num_nodes, num_adj, idtype=idtype) g = create_test_heterograph(num_nodes, num_adj, idtype=idtype)
g = dgl.heterograph({('_U', '_E', '_V') : g.edges()}) g = dgl.heterograph({("_U", "_E", "_V"): g.edges()})
utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_U'))) utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_U")))
vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_V'))) vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_V")))
new_g = dgl.sort_csr_by_tag(g, vtag) new_g = dgl.sort_csr_by_tag(g, vtag)
old_csr = g.adjacency_matrix(scipy_fmt='csr') old_csr = g.adjacency_matrix(scipy_fmt="csr")
new_csr = new_g.adjacency_matrix(scipy_fmt='csr') new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
assert(check_sort(new_csr, vtag, new_g.nodes['_U'].data['_TAG_OFFSET'])) assert check_sort(new_csr, vtag, new_g.nodes["_U"].data["_TAG_OFFSET"])
assert(not check_sort(old_csr, vtag)) assert not check_sort(old_csr, vtag)
new_g = dgl.sort_csc_by_tag(g, utag) new_g = dgl.sort_csc_by_tag(g, utag)
old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr') old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr') new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET'])) assert check_sort(new_csc, utag, new_g.nodes["_V"].data["_TAG_OFFSET"])
assert(not check_sort(old_csc, utag)) assert not check_sort(old_csc, utag)
if __name__ == "__main__": if __name__ == "__main__":
test_sort_with_tag(F.int32) test_sort_with_tag(F.int32)
......
from dgl.ops import gspmm, gsddmm, edge_softmax, segment_reduce
from test_utils.graph_cases import get_cases
from test_utils import parametrize_idtype
import dgl
import random import random
import pytest, unittest import unittest
import networkx as nx
import backend as F import backend as F
import numpy as np import networkx as nx
import numpy as np
import pytest
import torch import torch
from test_utils import parametrize_idtype
from test_utils.graph_cases import get_cases
import dgl
from dgl.ops import edge_softmax, gsddmm, gspmm, segment_reduce
random.seed(42) random.seed(42)
np.random.seed(42) np.random.seed(42)
udf_msg = { udf_msg = {
'add': lambda edges: {'m': edges.src['x'] + edges.data['w']}, "add": lambda edges: {"m": edges.src["x"] + edges.data["w"]},
'sub': lambda edges: {'m': edges.src['x'] - edges.data['w']}, "sub": lambda edges: {"m": edges.src["x"] - edges.data["w"]},
'mul': lambda edges: {'m': edges.src['x'] * edges.data['w']}, "mul": lambda edges: {"m": edges.src["x"] * edges.data["w"]},
'div': lambda edges: {'m': edges.src['x'] / edges.data['w']}, "div": lambda edges: {"m": edges.src["x"] / edges.data["w"]},
'copy_lhs': lambda edges: {'m': edges.src['x']}, "copy_lhs": lambda edges: {"m": edges.src["x"]},
'copy_rhs': lambda edges: {'m': edges.data['w']} "copy_rhs": lambda edges: {"m": edges.data["w"]},
} }
def select(target, src, edge, dst): def select(target, src, edge, dst):
if target == 'u': if target == "u":
return src return src
elif target == 'v': elif target == "v":
return dst return dst
elif target == 'e': elif target == "e":
return edge return edge
def binary_op(msg, x, y): def binary_op(msg, x, y):
if msg == 'add': if msg == "add":
return x + y return x + y
elif msg == 'sub': elif msg == "sub":
return x - y return x - y
elif msg == 'mul': elif msg == "mul":
return x * y return x * y
elif msg == 'div': elif msg == "div":
return x / y return x / y
elif msg == 'dot': elif msg == "dot":
return F.sum(x * y, -1, keepdims=True) return F.sum(x * y, -1, keepdims=True)
elif msg == 'copy_lhs': elif msg == "copy_lhs":
return x return x
elif msg == 'copy_rhs': elif msg == "copy_rhs":
return y return y
def edge_func(lhs_target, rhs_target, msg): def edge_func(lhs_target, rhs_target, msg):
def foo(edges): def foo(edges):
return { return {
'm': binary_op( "m": binary_op(
msg, msg,
select(lhs_target, edges.src, edges.data, edges.dst)['x'], select(lhs_target, edges.src, edges.data, edges.dst)["x"],
select(rhs_target, edges.src, edges.data, edges.dst)['y'] select(rhs_target, edges.src, edges.data, edges.dst)["y"],
) )
} }
return foo return foo
udf_apply_edges = { udf_apply_edges = {
lhs_target + '_' + msg + '_' + rhs_target: edge_func(lhs_target, rhs_target, msg) lhs_target
for lhs_target in ['u', 'v', 'e'] + "_"
for rhs_target in ['u', 'v', 'e'] + msg
for msg in ['add', 'sub', 'mul', 'div', 'dot', 'copy_lhs', 'copy_rhs'] + "_"
+ rhs_target: edge_func(lhs_target, rhs_target, msg)
for lhs_target in ["u", "v", "e"]
for rhs_target in ["u", "v", "e"]
for msg in ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"]
} }
udf_reduce = { udf_reduce = {
'sum': lambda nodes: {'v': F.sum(nodes.mailbox['m'], 1)}, "sum": lambda nodes: {"v": F.sum(nodes.mailbox["m"], 1)},
'min': lambda nodes: {'v': F.min(nodes.mailbox['m'], 1)}, "min": lambda nodes: {"v": F.min(nodes.mailbox["m"], 1)},
'max': lambda nodes: {'v': F.max(nodes.mailbox['m'], 1)} "max": lambda nodes: {"v": F.max(nodes.mailbox["m"], 1)},
} }
graphs = [ graphs = [
# dgl.rand_graph(30, 0), # dgl.rand_graph(30, 0),
dgl.rand_graph(30, 100), dgl.rand_graph(30, 100),
dgl.rand_bipartite('_U', '_E', '_V', 30, 40, 300) dgl.rand_bipartite("_U", "_E", "_V", 30, 40, 300),
] ]
spmm_shapes = [ spmm_shapes = [
...@@ -81,7 +93,7 @@ spmm_shapes = [ ...@@ -81,7 +93,7 @@ spmm_shapes = [
((1,), (3,)), ((1,), (3,)),
((3,), (1,)), ((3,), (1,)),
((1,), (1,)), ((1,), (1,)),
((), ()) ((), ()),
] ]
sddmm_shapes = [ sddmm_shapes = [
...@@ -89,17 +101,18 @@ sddmm_shapes = [ ...@@ -89,17 +101,18 @@ sddmm_shapes = [
((5, 3, 1, 7), (1, 3, 7, 7)), ((5, 3, 1, 7), (1, 3, 7, 7)),
((1, 3, 3), (4, 1, 3)), ((1, 3, 3), (4, 1, 3)),
((3,), (3,)), ((3,), (3,)),
((1,), (1,)) ((1,), (1,)),
] ]
edge_softmax_shapes = [ edge_softmax_shapes = [(1,), (1, 3), (3, 4, 5)]
(1,), (1, 3), (3, 4, 5)
]
@pytest.mark.parametrize('g', graphs)
@pytest.mark.parametrize('shp', spmm_shapes) @pytest.mark.parametrize("g", graphs)
@pytest.mark.parametrize('msg', ['add', 'sub', 'mul', 'div', 'copy_lhs', 'copy_rhs']) @pytest.mark.parametrize("shp", spmm_shapes)
@pytest.mark.parametrize('reducer', ['sum', 'min', 'max']) @pytest.mark.parametrize(
"msg", ["add", "sub", "mul", "div", "copy_lhs", "copy_rhs"]
)
@pytest.mark.parametrize("reducer", ["sum", "min", "max"])
@parametrize_idtype @parametrize_idtype
def test_spmm(idtype, g, shp, msg, reducer): def test_spmm(idtype, g, shp, msg, reducer):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
...@@ -108,65 +121,74 @@ def test_spmm(idtype, g, shp, msg, reducer): ...@@ -108,65 +121,74 @@ def test_spmm(idtype, g, shp, msg, reducer):
hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1)
he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1)
print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) print("u shape: {}, e shape: {}".format(F.shape(hu), F.shape(he)))
g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.srcdata["x"] = F.attach_grad(F.clone(hu))
g.edata['w'] = F.attach_grad(F.clone(he)) g.edata["w"] = F.attach_grad(F.clone(he))
print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) print("SpMM(message func: {}, reduce func: {})".format(msg, reducer))
u = F.attach_grad(F.clone(hu)) u = F.attach_grad(F.clone(hu))
e = F.attach_grad(F.clone(he)) e = F.attach_grad(F.clone(he))
with F.record_grad(): with F.record_grad():
v = gspmm(g, msg, reducer, u, e) v = gspmm(g, msg, reducer, u, e)
if reducer in ['max', 'min']: if reducer in ["max", "min"]:
v = F.replace_inf_with_zero(v) v = F.replace_inf_with_zero(v)
if g.number_of_edges() > 0: if g.number_of_edges() > 0:
F.backward(F.reduce_sum(v)) F.backward(F.reduce_sum(v))
if msg != 'copy_rhs': if msg != "copy_rhs":
grad_u = F.grad(u) grad_u = F.grad(u)
if msg != 'copy_lhs': if msg != "copy_lhs":
grad_e = F.grad(e) grad_e = F.grad(e)
with F.record_grad(): with F.record_grad():
g.update_all(udf_msg[msg], udf_reduce[reducer]) g.update_all(udf_msg[msg], udf_reduce[reducer])
if g.number_of_edges() > 0: if g.number_of_edges() > 0:
v1 = g.dstdata['v'] v1 = g.dstdata["v"]
assert F.allclose(v, v1) assert F.allclose(v, v1)
print('forward passed') print("forward passed")
F.backward(F.reduce_sum(v1)) F.backward(F.reduce_sum(v1))
if msg != 'copy_rhs': if msg != "copy_rhs":
if reducer in ['min', 'max']: # there might be some numerical errors if reducer in [
rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ "min",
F.reduce_sum(F.abs(grad_u)) "max",
]: # there might be some numerical errors
rate = F.reduce_sum(
F.abs(F.grad(g.srcdata["x"]) - grad_u)
) / F.reduce_sum(F.abs(grad_u))
assert F.as_scalar(rate) < 1e-2, rate assert F.as_scalar(rate) < 1e-2, rate
else: else:
assert F.allclose(F.grad(g.srcdata['x']), grad_u) assert F.allclose(F.grad(g.srcdata["x"]), grad_u)
if msg != 'copy_lhs': if msg != "copy_lhs":
if reducer in ['min', 'max']: if reducer in ["min", "max"]:
rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ rate = F.reduce_sum(
F.reduce_sum(F.abs(grad_e)) F.abs(F.grad(g.edata["w"]) - grad_e)
) / F.reduce_sum(F.abs(grad_e))
assert F.as_scalar(rate) < 1e-2, rate assert F.as_scalar(rate) < 1e-2, rate
else: else:
assert F.allclose(F.grad(g.edata['w']), grad_e) assert F.allclose(F.grad(g.edata["w"]), grad_e)
print('backward passed') print("backward passed")
g.srcdata.pop('x') g.srcdata.pop("x")
g.edata.pop('w') g.edata.pop("w")
if 'v' in g.dstdata: g.dstdata.pop('v') if "v" in g.dstdata:
g.dstdata.pop("v")
@pytest.mark.parametrize('g', graphs)
@pytest.mark.parametrize('shp', sddmm_shapes)
@pytest.mark.parametrize('lhs_target', ['u', 'v', 'e']) @pytest.mark.parametrize("g", graphs)
@pytest.mark.parametrize('rhs_target', ['u', 'v', 'e']) @pytest.mark.parametrize("shp", sddmm_shapes)
@pytest.mark.parametrize('msg', ['add', 'sub', 'mul', 'div', 'dot', 'copy_lhs', 'copy_rhs']) @pytest.mark.parametrize("lhs_target", ["u", "v", "e"])
@pytest.mark.parametrize("rhs_target", ["u", "v", "e"])
@pytest.mark.parametrize(
"msg", ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"]
)
@parametrize_idtype @parametrize_idtype
def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype): def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
if lhs_target == rhs_target: if lhs_target == rhs_target:
return return
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
if dgl.backend.backend_name == 'mxnet' and g.number_of_edges() == 0: if dgl.backend.backend_name == "mxnet" and g.number_of_edges() == 0:
pytest.skip() # mxnet do not support zero shape tensor pytest.skip() # mxnet do not support zero shape tensor
print(g) print(g)
print(g.idtype) print(g.idtype)
...@@ -174,37 +196,37 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype): ...@@ -174,37 +196,37 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
lhs_target, lhs_target,
g.number_of_src_nodes(), g.number_of_src_nodes(),
g.number_of_edges(), g.number_of_edges(),
g.number_of_dst_nodes()) g.number_of_dst_nodes(),
)
lhs_shp = (len_lhs,) + shp[0] lhs_shp = (len_lhs,) + shp[0]
len_rhs = select( len_rhs = select(
rhs_target, rhs_target,
g.number_of_src_nodes(), g.number_of_src_nodes(),
g.number_of_edges(), g.number_of_edges(),
g.number_of_dst_nodes()) g.number_of_dst_nodes(),
)
rhs_shp = (len_rhs,) + shp[1] rhs_shp = (len_rhs,) + shp[1]
feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1) feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1)
feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1) feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1)
print('lhs shape: {}, rhs shape: {}'.format(F.shape(feat_lhs), F.shape(feat_rhs))) print(
"lhs shape: {}, rhs shape: {}".format(
lhs_frame = select( F.shape(feat_lhs), F.shape(feat_rhs)
lhs_target, )
g.srcdata, )
g.edata,
g.dstdata) lhs_frame = select(lhs_target, g.srcdata, g.edata, g.dstdata)
rhs_frame = select( rhs_frame = select(rhs_target, g.srcdata, g.edata, g.dstdata)
rhs_target, lhs_frame["x"] = F.attach_grad(F.clone(feat_lhs))
g.srcdata, rhs_frame["y"] = F.attach_grad(F.clone(feat_rhs))
g.edata, msg_func = lhs_target + "_" + msg + "_" + rhs_target
g.dstdata) print("SDDMM(message func: {})".format(msg_func))
lhs_frame['x'] = F.attach_grad(F.clone(feat_lhs))
rhs_frame['y'] = F.attach_grad(F.clone(feat_rhs))
msg_func = lhs_target + '_' + msg + '_' + rhs_target
print('SDDMM(message func: {})'.format(msg_func))
lhs = F.attach_grad(F.clone(feat_lhs)) lhs = F.attach_grad(F.clone(feat_lhs))
rhs = F.attach_grad(F.clone(feat_rhs)) rhs = F.attach_grad(F.clone(feat_rhs))
with F.record_grad(): with F.record_grad():
e = gsddmm(g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target) e = gsddmm(
g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target
)
F.backward(F.reduce_sum(e)) F.backward(F.reduce_sum(e))
grad_lhs = F.grad(lhs) grad_lhs = F.grad(lhs)
grad_rhs = F.grad(rhs) grad_rhs = F.grad(rhs)
...@@ -212,24 +234,26 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype): ...@@ -212,24 +234,26 @@ def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype):
with F.record_grad(): with F.record_grad():
g.apply_edges(udf_apply_edges[msg_func]) g.apply_edges(udf_apply_edges[msg_func])
if g.number_of_edges() > 0: if g.number_of_edges() > 0:
e1 = g.edata['m'] e1 = g.edata["m"]
assert F.allclose(e, e1) assert F.allclose(e, e1)
print('forward passed') print("forward passed")
F.backward(F.reduce_sum(e1)) F.backward(F.reduce_sum(e1))
if msg != 'copy_rhs': if msg != "copy_rhs":
assert F.allclose(F.grad(lhs_frame['x']), grad_lhs) assert F.allclose(F.grad(lhs_frame["x"]), grad_lhs)
if msg != 'copy_lhs': if msg != "copy_lhs":
assert F.allclose(F.grad(rhs_frame['y']), grad_rhs) assert F.allclose(F.grad(rhs_frame["y"]), grad_rhs)
print('backward passed') print("backward passed")
lhs_frame.pop('x') lhs_frame.pop("x")
rhs_frame.pop('y') rhs_frame.pop("y")
if 'm' in g.edata: g.edata.pop('m') if "m" in g.edata:
g.edata.pop("m")
@pytest.mark.parametrize('g', get_cases(['clique']))
@pytest.mark.parametrize('norm_by', ['src', 'dst'])
@pytest.mark.parametrize('shp', edge_softmax_shapes) @pytest.mark.parametrize("g", get_cases(["clique"]))
@pytest.mark.parametrize("norm_by", ["src", "dst"])
@pytest.mark.parametrize("shp", edge_softmax_shapes)
@parametrize_idtype @parametrize_idtype
def test_edge_softmax(g, norm_by, shp, idtype): def test_edge_softmax(g, norm_by, shp, idtype):
g = g.astype(idtype).to(F.ctx()) g = g.astype(idtype).to(F.ctx())
...@@ -244,21 +268,24 @@ def test_edge_softmax(g, norm_by, shp, idtype): ...@@ -244,21 +268,24 @@ def test_edge_softmax(g, norm_by, shp, idtype):
with F.record_grad(): with F.record_grad():
e2 = F.attach_grad(F.clone(edata)) e2 = F.attach_grad(F.clone(edata))
e2_2d = F.reshape( e2_2d = F.reshape(
e2, (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:])) e2,
if norm_by == 'src': (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]),
)
if norm_by == "src":
score2 = F.softmax(e2_2d, 1) score2 = F.softmax(e2_2d, 1)
score2 = F.reshape(score2, (-1, *e2.shape[1:])) score2 = F.reshape(score2, (-1, *e2.shape[1:]))
if norm_by == 'dst': if norm_by == "dst":
score2 = F.softmax(e2_2d, 0) score2 = F.softmax(e2_2d, 0)
score2 = F.reshape(score2, (-1, *e2.shape[1:])) score2 = F.reshape(score2, (-1, *e2.shape[1:]))
assert F.allclose(score1, score2) assert F.allclose(score1, score2)
print('forward passed') print("forward passed")
F.backward(F.reduce_sum(score2)) F.backward(F.reduce_sum(score2))
assert F.allclose(F.grad(e2), grad_edata) assert F.allclose(F.grad(e2), grad_edata)
print('backward passed') print("backward passed")
@pytest.mark.parametrize('reducer', ['sum', 'max', 'min', 'mean']) @pytest.mark.parametrize("reducer", ["sum", "max", "min", "mean"])
def test_segment_reduce(reducer): def test_segment_reduce(reducer):
ctx = F.ctx() ctx = F.ctx()
value = F.tensor(np.random.rand(10, 5)) value = F.tensor(np.random.rand(10, 5))
...@@ -266,14 +293,17 @@ def test_segment_reduce(reducer): ...@@ -266,14 +293,17 @@ def test_segment_reduce(reducer):
v2 = F.attach_grad(F.clone(value)) v2 = F.attach_grad(F.clone(value))
seglen = F.tensor([2, 3, 0, 4, 1, 0, 0]) seglen = F.tensor([2, 3, 0, 4, 1, 0, 0])
u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx) u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx)
v = F.repeat(F.copy_to(F.arange(0, len(seglen), F.int32), ctx), v = F.repeat(
seglen, dim=0) F.copy_to(F.arange(0, len(seglen), F.int32), ctx), seglen, dim=0
)
num_nodes = {'_U': len(u), '_V': len(seglen)}
g = dgl.convert.heterograph({('_U', '_E', '_V'): (u, v)}, num_nodes_dict=num_nodes) num_nodes = {"_U": len(u), "_V": len(seglen)}
g = dgl.convert.heterograph(
{("_U", "_E", "_V"): (u, v)}, num_nodes_dict=num_nodes
)
with F.record_grad(): with F.record_grad():
rst1 = gspmm(g, 'copy_lhs', reducer, v1, None) rst1 = gspmm(g, "copy_lhs", reducer, v1, None)
if reducer in ['max', 'min']: if reducer in ["max", "min"]:
rst1 = F.replace_inf_with_zero(rst1) rst1 = F.replace_inf_with_zero(rst1)
F.backward(F.reduce_sum(rst1)) F.backward(F.reduce_sum(rst1))
grad1 = F.grad(v1) grad1 = F.grad(v1)
...@@ -282,24 +312,36 @@ def test_segment_reduce(reducer): ...@@ -282,24 +312,36 @@ def test_segment_reduce(reducer):
rst2 = segment_reduce(seglen, v2, reducer=reducer) rst2 = segment_reduce(seglen, v2, reducer=reducer)
F.backward(F.reduce_sum(rst2)) F.backward(F.reduce_sum(rst2))
assert F.allclose(rst1, rst2) assert F.allclose(rst1, rst2)
print('forward passed') print("forward passed")
grad2 = F.grad(v2) grad2 = F.grad(v2)
assert F.allclose(grad1, grad2) assert F.allclose(grad1, grad2)
print('backward passed') print("backward passed")
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now') @unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256]) @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
@pytest.mark.parametrize('dtype,tol', [(torch.float16,1e-2),(torch.float32,3e-3),(torch.float64,1e-4)]) @pytest.mark.parametrize(
"dtype,tol",
[(torch.float16, 1e-2), (torch.float32, 3e-3), (torch.float64, 1e-4)],
)
def test_segment_mm(idtype, feat_size, dtype, tol): def test_segment_mm(idtype, feat_size, dtype, tol):
if F._default_context_str == 'cpu' and dtype == torch.float16: if F._default_context_str == "cpu" and dtype == torch.float16:
pytest.skip("fp16 support for CPU linalg functions has been removed in PyTorch.") pytest.skip(
"fp16 support for CPU linalg functions has been removed in PyTorch."
)
dev = F.ctx() dev = F.ctx()
# input # input
a = torch.tensor(np.random.rand(100, feat_size)).to(dev).to(dtype) a = torch.tensor(np.random.rand(100, feat_size)).to(dev).to(dtype)
a.requires_grad_() a.requires_grad_()
b = torch.tensor(np.random.rand(10, feat_size, feat_size + 1)).to(dev).to(dtype) b = (
torch.tensor(np.random.rand(10, feat_size, feat_size + 1))
.to(dev)
.to(dtype)
)
b.requires_grad_() b.requires_grad_()
seglen_a = torch.tensor([10, 15, 8, 0, 1, 9, 18, 24, 15, 0]) seglen_a = torch.tensor([10, 15, 8, 0, 1, 9, 18, 24, 15, 0])
dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev).to(dtype) dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev).to(dtype)
...@@ -312,7 +354,7 @@ def test_segment_mm(idtype, feat_size, dtype, tol): ...@@ -312,7 +354,7 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
c_t = [] c_t = []
off = 0 off = 0
for i, l in enumerate(seglen_a): for i, l in enumerate(seglen_a):
c_t.append(a[off:off+l] @ b[i]) c_t.append(a[off : off + l] @ b[i])
off += l off += l
c_t = torch.cat(c_t).to(dtype) c_t = torch.cat(c_t).to(dtype)
a.grad.zero_() a.grad.zero_()
...@@ -325,11 +367,15 @@ def test_segment_mm(idtype, feat_size, dtype, tol): ...@@ -325,11 +367,15 @@ def test_segment_mm(idtype, feat_size, dtype, tol):
assert torch.allclose(da, da_t, atol=tol, rtol=tol) assert torch.allclose(da, da_t, atol=tol, rtol=tol)
assert torch.allclose(db, db_t, atol=tol, rtol=tol) assert torch.allclose(db, db_t, atol=tol, rtol=tol)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256]) @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
def test_gather_mm_idx_b(idtype, feat_size): def test_gather_mm_idx_b(idtype, feat_size):
import torch import torch
dev = F.ctx() dev = F.ctx()
# input # input
a = torch.tensor(np.random.rand(100, feat_size)).to(dev) a = torch.tensor(np.random.rand(100, feat_size)).to(dev)
...@@ -355,12 +401,16 @@ def test_gather_mm_idx_b(idtype, feat_size): ...@@ -355,12 +401,16 @@ def test_gather_mm_idx_b(idtype, feat_size):
assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4) assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4)
assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4) assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@parametrize_idtype @parametrize_idtype
@pytest.mark.parametrize('feat_size', [1, 8, 16, 64, 256]) @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256])
def _test_gather_mm_idx_a(idtype, feat_size): def _test_gather_mm_idx_a(idtype, feat_size):
# TODO(minjie): currently disabled due to bugs in the CUDA kernel. Need to fix it later. # TODO(minjie): currently disabled due to bugs in the CUDA kernel. Need to fix it later.
import torch import torch
dev = F.ctx() dev = F.ctx()
# input # input
a = torch.tensor(np.random.rand(10, feat_size)).to(dev) a = torch.tensor(np.random.rand(10, feat_size)).to(dev)
...@@ -386,10 +436,16 @@ def _test_gather_mm_idx_a(idtype, feat_size): ...@@ -386,10 +436,16 @@ def _test_gather_mm_idx_a(idtype, feat_size):
assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4) assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4)
assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4) assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(F._default_context_str == 'gpu', reason="Libxsmm only fit in CPU.") @unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@unittest.skipIf(
F._default_context_str == "gpu", reason="Libxsmm only fit in CPU."
)
def test_use_libxsmm_switch(): def test_use_libxsmm_switch():
import torch import torch
g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])) g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2]))
x = torch.ones(3, 2, requires_grad=True) x = torch.ones(3, 2, requires_grad=True)
y = torch.arange(1, 13).float().view(6, 2).requires_grad_() y = torch.arange(1, 13).float().view(6, 2).requires_grad_()
......
import numpy as np
import networkx as nx
import unittest import unittest
import scipy.sparse as ssp
import pytest
import dgl
import backend as F import backend as F
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as ssp
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
D = 5 D = 5
def generate_graph(grad=False, add_data=True): def generate_graph(grad=False, add_data=True):
g = dgl.DGLGraph().to(F.ctx()) g = dgl.DGLGraph().to(F.ctx())
g.add_nodes(10) g.add_nodes(10)
...@@ -25,10 +27,11 @@ def generate_graph(grad=False, add_data=True): ...@@ -25,10 +27,11 @@ def generate_graph(grad=False, add_data=True):
if grad: if grad:
ncol = F.attach_grad(ncol) ncol = F.attach_grad(ncol)
ecol = F.attach_grad(ecol) ecol = F.attach_grad(ecol)
g.ndata['h'] = ncol g.ndata["h"] = ncol
g.edata['l'] = ecol g.edata["l"] = ecol
return g return g
def test_edge_subgraph(): def test_edge_subgraph():
# Test when the graph has no node data and edge data. # Test when the graph has no node data and edge data.
g = generate_graph(add_data=False) g = generate_graph(add_data=False)
...@@ -36,22 +39,25 @@ def test_edge_subgraph(): ...@@ -36,22 +39,25 @@ def test_edge_subgraph():
# relabel=True # relabel=True
sg = g.edge_subgraph(eid) sg = g.edge_subgraph(eid)
assert F.array_equal(sg.ndata[dgl.NID], F.tensor([0, 2, 4, 5, 1, 9], g.idtype)) assert F.array_equal(
sg.ndata[dgl.NID], F.tensor([0, 2, 4, 5, 1, 9], g.idtype)
)
assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype)) assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype))
sg.ndata['h'] = F.arange(0, sg.number_of_nodes()) sg.ndata["h"] = F.arange(0, sg.number_of_nodes())
sg.edata['h'] = F.arange(0, sg.number_of_edges()) sg.edata["h"] = F.arange(0, sg.number_of_edges())
# relabel=False # relabel=False
sg = g.edge_subgraph(eid, relabel_nodes=False) sg = g.edge_subgraph(eid, relabel_nodes=False)
assert g.number_of_nodes() == sg.number_of_nodes() assert g.number_of_nodes() == sg.number_of_nodes()
assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype)) assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype))
sg.ndata['h'] = F.arange(0, sg.number_of_nodes()) sg.ndata["h"] = F.arange(0, sg.number_of_nodes())
sg.edata['h'] = F.arange(0, sg.number_of_edges()) sg.edata["h"] = F.arange(0, sg.number_of_edges())
def test_subgraph(): def test_subgraph():
g = generate_graph() g = generate_graph()
h = g.ndata['h'] h = g.ndata["h"]
l = g.edata['l'] l = g.edata["l"]
nid = [0, 2, 3, 6, 7, 9] nid = [0, 2, 3, 6, 7, 9]
sg = g.subgraph(nid) sg = g.subgraph(nid)
eid = {2, 3, 4, 5, 10, 11, 12, 13, 16} eid = {2, 3, 4, 5, 10, 11, 12, 13, 16}
...@@ -60,9 +66,9 @@ def test_subgraph(): ...@@ -60,9 +66,9 @@ def test_subgraph():
# the subgraph is empty initially except for NID/EID field # the subgraph is empty initially except for NID/EID field
assert len(sg.ndata) == 2 assert len(sg.ndata) == 2
assert len(sg.edata) == 2 assert len(sg.edata) == 2
sh = sg.ndata['h'] sh = sg.ndata["h"]
assert F.allclose(F.gather_row(h, F.tensor(nid)), sh) assert F.allclose(F.gather_row(h, F.tensor(nid)), sh)
''' """
s, d, eid s, d, eid
0, 1, 0 0, 1, 0
1, 9, 1 1, 9, 1
...@@ -81,12 +87,13 @@ def test_subgraph(): ...@@ -81,12 +87,13 @@ def test_subgraph():
0, 8, 14 0, 8, 14
8, 9, 15 3 8, 9, 15 3
9, 0, 16 1 9, 0, 16 1
''' """
assert F.allclose(F.gather_row(l, eid), sg.edata['l']) assert F.allclose(F.gather_row(l, eid), sg.edata["l"])
# update the node/edge features on the subgraph should NOT # update the node/edge features on the subgraph should NOT
# reflect to the parent graph. # reflect to the parent graph.
sg.ndata['h'] = F.zeros((6, D)) sg.ndata["h"] = F.zeros((6, D))
assert F.allclose(h, g.ndata['h']) assert F.allclose(h, g.ndata["h"])
def _test_map_to_subgraph(): def _test_map_to_subgraph():
g = dgl.DGLGraph() g = dgl.DGLGraph()
...@@ -96,6 +103,7 @@ def _test_map_to_subgraph(): ...@@ -96,6 +103,7 @@ def _test_map_to_subgraph():
v = h.map_to_subgraph_nid([0, 8, 2]) v = h.map_to_subgraph_nid([0, 8, 2])
assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2])) assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2]))
def create_test_heterograph(idtype): def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation # test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers # 3 users, 2 games, 2 developers
...@@ -105,29 +113,37 @@ def create_test_heterograph(idtype): ...@@ -105,29 +113,37 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'), # ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')]) # ('developer', 'develops', 'game')])
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1], [1, 2]), {
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "follows", "user"): ([0, 1], [1, 2]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
('developer', 'develops', 'game'): ([0, 1], [0, 1]) ("user", "wishes", "game"): ([0, 2], [1, 0]),
}, idtype=idtype, device=F.ctx()) ("developer", "develops", "game"): ([0, 1], [0, 1]),
},
idtype=idtype,
device=F.ctx(),
)
for etype in g.etypes: for etype in g.etypes:
g.edges[etype].data['weight'] = F.randn((g.num_edges(etype),)) g.edges[etype].data["weight"] = F.randn((g.num_edges(etype),))
assert g.idtype == idtype assert g.idtype == idtype
assert g.device == F.ctx() assert g.device == F.ctx()
return g return g
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="MXNet doesn't support bool tensor")
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="MXNet doesn't support bool tensor",
)
@parametrize_idtype @parametrize_idtype
def test_subgraph_mask(idtype): def test_subgraph_mask(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
g_graph = g['follows'] g_graph = g["follows"]
g_bipartite = g['plays'] g_bipartite = g["plays"]
x = F.randn((3, 5)) x = F.randn((3, 5))
y = F.randn((2, 4)) y = F.randn((2, 4))
g.nodes['user'].data['h'] = x g.nodes["user"].data["h"] = x
g.edges['follows'].data['h'] = y g.edges["follows"].data["h"] = y
def _check_subgraph(g, sg): def _check_subgraph(g, sg):
assert sg.idtype == g.idtype assert sg.idtype == g.idtype
...@@ -135,39 +151,57 @@ def test_subgraph_mask(idtype): ...@@ -135,39 +151,57 @@ def test_subgraph_mask(idtype):
assert sg.ntypes == g.ntypes assert sg.ntypes == g.ntypes
assert sg.etypes == g.etypes assert sg.etypes == g.etypes
assert sg.canonical_etypes == g.canonical_etypes assert sg.canonical_etypes == g.canonical_etypes
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), assert F.array_equal(
F.tensor([1, 2], idtype)) F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], idtype)
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), )
F.tensor([0], idtype)) assert F.array_equal(
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], idtype)
F.tensor([1], idtype)) )
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), assert F.array_equal(
F.tensor([1], idtype)) F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], idtype)
assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]), )
F.tensor([1], idtype)) assert F.array_equal(
assert sg.number_of_nodes('developer') == 0 F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], idtype)
assert sg.number_of_edges('develops') == 0 )
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2]) F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], idtype)
)
sg1 = g.subgraph({'user': F.tensor([False, True, True], dtype=F.bool), assert sg.number_of_nodes("developer") == 0
'game': F.tensor([True, False, False, False], dtype=F.bool)}) assert sg.number_of_edges("develops") == 0
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
sg1 = g.subgraph(
{
"user": F.tensor([False, True, True], dtype=F.bool),
"game": F.tensor([True, False, False, False], dtype=F.bool),
}
)
_check_subgraph(g, sg1) _check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': F.tensor([False, True], dtype=F.bool), sg2 = g.edge_subgraph(
'plays': F.tensor([False, True, False, False], dtype=F.bool), {
'wishes': F.tensor([False, True], dtype=F.bool)}) "follows": F.tensor([False, True], dtype=F.bool),
"plays": F.tensor([False, True, False, False], dtype=F.bool),
"wishes": F.tensor([False, True], dtype=F.bool),
}
)
_check_subgraph(g, sg2) _check_subgraph(g, sg2)
@parametrize_idtype @parametrize_idtype
def test_subgraph1(idtype): def test_subgraph1(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
g_graph = g['follows'] g_graph = g["follows"]
g_bipartite = g['plays'] g_bipartite = g["plays"]
x = F.randn((3, 5)) x = F.randn((3, 5))
y = F.randn((2, 4)) y = F.randn((2, 4))
g.nodes['user'].data['h'] = x g.nodes["user"].data["h"] = x
g.edges['follows'].data['h'] = y g.edges["follows"].data["h"] = y
def _check_subgraph(g, sg): def _check_subgraph(g, sg):
assert sg.idtype == g.idtype assert sg.idtype == g.idtype
...@@ -175,42 +209,62 @@ def test_subgraph1(idtype): ...@@ -175,42 +209,62 @@ def test_subgraph1(idtype):
assert sg.ntypes == g.ntypes assert sg.ntypes == g.ntypes
assert sg.etypes == g.etypes assert sg.etypes == g.etypes
assert sg.canonical_etypes == g.canonical_etypes assert sg.canonical_etypes == g.canonical_etypes
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), assert F.array_equal(
F.tensor([1, 2], g.idtype)) F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype)
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), )
F.tensor([0], g.idtype)) assert F.array_equal(
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype)
F.tensor([1], g.idtype)) )
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), assert F.array_equal(
F.tensor([1], g.idtype)) F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype)
assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]), )
F.tensor([1], g.idtype)) assert F.array_equal(
assert sg.number_of_nodes('developer') == 0 F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], g.idtype)
assert sg.number_of_edges('develops') == 0 )
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2]) F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], g.idtype)
)
sg1 = g.subgraph({'user': [1, 2], 'game': [0]}) assert sg.number_of_nodes("developer") == 0
assert sg.number_of_edges("develops") == 0
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
sg1 = g.subgraph({"user": [1, 2], "game": [0]})
_check_subgraph(g, sg1) _check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': [1], 'plays': [1], 'wishes': [1]}) sg2 = g.edge_subgraph({"follows": [1], "plays": [1], "wishes": [1]})
_check_subgraph(g, sg2) _check_subgraph(g, sg2)
# backend tensor input # backend tensor input
sg1 = g.subgraph({'user': F.tensor([1, 2], dtype=idtype), sg1 = g.subgraph(
'game': F.tensor([0], dtype=idtype)}) {
"user": F.tensor([1, 2], dtype=idtype),
"game": F.tensor([0], dtype=idtype),
}
)
_check_subgraph(g, sg1) _check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': F.tensor([1], dtype=idtype), sg2 = g.edge_subgraph(
'plays': F.tensor([1], dtype=idtype), {
'wishes': F.tensor([1], dtype=idtype)}) "follows": F.tensor([1], dtype=idtype),
"plays": F.tensor([1], dtype=idtype),
"wishes": F.tensor([1], dtype=idtype),
}
)
_check_subgraph(g, sg2) _check_subgraph(g, sg2)
# numpy input # numpy input
sg1 = g.subgraph({'user': np.array([1, 2]), sg1 = g.subgraph({"user": np.array([1, 2]), "game": np.array([0])})
'game': np.array([0])})
_check_subgraph(g, sg1) _check_subgraph(g, sg1)
sg2 = g.edge_subgraph({'follows': np.array([1]), sg2 = g.edge_subgraph(
'plays': np.array([1]), {
'wishes': np.array([1])}) "follows": np.array([1]),
"plays": np.array([1]),
"wishes": np.array([1]),
}
)
_check_subgraph(g, sg2) _check_subgraph(g, sg2)
def _check_subgraph_single_ntype(g, sg, preserve_nodes=False): def _check_subgraph_single_ntype(g, sg, preserve_nodes=False):
...@@ -221,18 +275,25 @@ def test_subgraph1(idtype): ...@@ -221,18 +275,25 @@ def test_subgraph1(idtype):
assert sg.canonical_etypes == g.canonical_etypes assert sg.canonical_etypes == g.canonical_etypes
if not preserve_nodes: if not preserve_nodes:
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), assert F.array_equal(
F.tensor([1, 2], g.idtype)) F.tensor(sg.nodes["user"].data[dgl.NID]),
F.tensor([1, 2], g.idtype),
)
else: else:
for ntype in sg.ntypes: for ntype in sg.ntypes:
assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), assert F.array_equal(
F.tensor([1], g.idtype)) F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype)
)
if not preserve_nodes: if not preserve_nodes:
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2]) sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2]
)
def _check_subgraph_single_etype(g, sg, preserve_nodes=False): def _check_subgraph_single_etype(g, sg, preserve_nodes=False):
assert sg.ntypes == g.ntypes assert sg.ntypes == g.ntypes
...@@ -240,16 +301,22 @@ def test_subgraph1(idtype): ...@@ -240,16 +301,22 @@ def test_subgraph1(idtype):
assert sg.canonical_etypes == g.canonical_etypes assert sg.canonical_etypes == g.canonical_etypes
if not preserve_nodes: if not preserve_nodes:
assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), assert F.array_equal(
F.tensor([0, 1], g.idtype)) F.tensor(sg.nodes["user"].data[dgl.NID]),
assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), F.tensor([0, 1], g.idtype),
F.tensor([0], g.idtype)) )
assert F.array_equal(
F.tensor(sg.nodes["game"].data[dgl.NID]),
F.tensor([0], g.idtype),
)
else: else:
for ntype in sg.ntypes: for ntype in sg.ntypes:
assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), assert F.array_equal(
F.tensor([0, 1], g.idtype)) F.tensor(sg.edges["plays"].data[dgl.EID]),
F.tensor([0, 1], g.idtype),
)
sg1_graph = g_graph.subgraph([1, 2]) sg1_graph = g_graph.subgraph([1, 2])
_check_subgraph_single_ntype(g_graph, sg1_graph) _check_subgraph_single_ntype(g_graph, sg1_graph)
...@@ -265,222 +332,279 @@ def test_subgraph1(idtype): ...@@ -265,222 +332,279 @@ def test_subgraph1(idtype):
def _check_typed_subgraph1(g, sg): def _check_typed_subgraph1(g, sg):
assert g.idtype == sg.idtype assert g.idtype == sg.idtype
assert g.device == sg.device assert g.device == sg.device
assert set(sg.ntypes) == {'user', 'game'} assert set(sg.ntypes) == {"user", "game"}
assert set(sg.etypes) == {'follows', 'plays', 'wishes'} assert set(sg.etypes) == {"follows", "plays", "wishes"}
for ntype in sg.ntypes: for ntype in sg.ntypes:
assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype) assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
for etype in sg.etypes: for etype in sg.etypes:
src_sg, dst_sg = sg.all_edges(etype=etype, order='eid') src_sg, dst_sg = sg.all_edges(etype=etype, order="eid")
src_g, dst_g = g.all_edges(etype=etype, order='eid') src_g, dst_g = g.all_edges(etype=etype, order="eid")
assert F.array_equal(src_sg, src_g) assert F.array_equal(src_sg, src_g)
assert F.array_equal(dst_sg, dst_g) assert F.array_equal(dst_sg, dst_g)
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h']) sg.nodes["user"].data["h"], g.nodes["user"].data["h"]
g.nodes['user'].data['h'] = F.scatter_row(g.nodes['user'].data['h'], F.tensor([2]), F.randn((1, 5))) )
g.edges['follows'].data['h'] = F.scatter_row(g.edges['follows'].data['h'], F.tensor([1]), F.randn((1, 4))) assert F.array_equal(
assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) sg.edges["follows"].data["h"], g.edges["follows"].data["h"]
assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h']) )
g.nodes["user"].data["h"] = F.scatter_row(
g.nodes["user"].data["h"], F.tensor([2]), F.randn((1, 5))
)
g.edges["follows"].data["h"] = F.scatter_row(
g.edges["follows"].data["h"], F.tensor([1]), F.randn((1, 4))
)
assert F.array_equal(
sg.nodes["user"].data["h"], g.nodes["user"].data["h"]
)
assert F.array_equal(
sg.edges["follows"].data["h"], g.edges["follows"].data["h"]
)
def _check_typed_subgraph2(g, sg): def _check_typed_subgraph2(g, sg):
assert set(sg.ntypes) == {'developer', 'game'} assert set(sg.ntypes) == {"developer", "game"}
assert set(sg.etypes) == {'develops'} assert set(sg.etypes) == {"develops"}
for ntype in sg.ntypes: for ntype in sg.ntypes:
assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype) assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
for etype in sg.etypes: for etype in sg.etypes:
src_sg, dst_sg = sg.all_edges(etype=etype, order='eid') src_sg, dst_sg = sg.all_edges(etype=etype, order="eid")
src_g, dst_g = g.all_edges(etype=etype, order='eid') src_g, dst_g = g.all_edges(etype=etype, order="eid")
assert F.array_equal(src_sg, src_g) assert F.array_equal(src_sg, src_g)
assert F.array_equal(dst_sg, dst_g) assert F.array_equal(dst_sg, dst_g)
sg3 = g.node_type_subgraph(['user', 'game']) sg3 = g.node_type_subgraph(["user", "game"])
_check_typed_subgraph1(g, sg3) _check_typed_subgraph1(g, sg3)
sg4 = g.edge_type_subgraph(['develops']) sg4 = g.edge_type_subgraph(["develops"])
_check_typed_subgraph2(g, sg4) _check_typed_subgraph2(g, sg4)
sg5 = g.edge_type_subgraph(['follows', 'plays', 'wishes']) sg5 = g.edge_type_subgraph(["follows", "plays", "wishes"])
_check_typed_subgraph1(g, sg5) _check_typed_subgraph1(g, sg5)
# Test for restricted format # Test for restricted format
for fmt in ['csr', 'csc', 'coo']: for fmt in ["csr", "csc", "coo"]:
g = dgl.graph(([0, 1], [1, 2])).formats(fmt) g = dgl.graph(([0, 1], [1, 2])).formats(fmt)
sg = g.subgraph({g.ntypes[0]: [1, 0]}) sg = g.subgraph({g.ntypes[0]: [1, 0]})
nids = F.asnumpy(sg.ndata[dgl.NID]) nids = F.asnumpy(sg.ndata[dgl.NID])
assert np.array_equal(nids, np.array([1, 0])) assert np.array_equal(nids, np.array([1, 0]))
src, dst = sg.edges(order='eid') src, dst = sg.edges(order="eid")
src = F.asnumpy(src) src = F.asnumpy(src)
dst = F.asnumpy(dst) dst = F.asnumpy(dst)
assert np.array_equal(src, np.array([1])) assert np.array_equal(src, np.array([1]))
@parametrize_idtype @parametrize_idtype
def test_in_subgraph(idtype): def test_in_subgraph(idtype):
hg = dgl.heterograph({ hg = dgl.heterograph(
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), {
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ("user", "follow", "user"): (
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), [1, 2, 3, 0, 2, 3, 0],
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) [0, 0, 0, 1, 1, 1, 2],
}, idtype=idtype, num_nodes_dict={'user': 5, 'game': 10, 'coin': 8}).to(F.ctx()) ),
subg = dgl.in_subgraph(hg, {'user' : [0,1], 'game' : 0}) ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
},
idtype=idtype,
num_nodes_dict={"user": 5, "game": 10, "coin": 8},
).to(F.ctx())
subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0})
assert subg.idtype == idtype assert subg.idtype == idtype
assert len(subg.ntypes) == 3 assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4 assert len(subg.etypes) == 4
u, v = subg['follow'].edges() u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) assert F.array_equal(
assert edge_set == {(1,0),(2,0),(3,0),(0,1),(2,1),(3,1)} hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
u, v = subg['play'].edges() )
assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)}
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID])
assert edge_set == {(0,0)} assert edge_set == {(0, 0)}
u, v = subg['liked-by'].edges() u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) assert F.array_equal(
assert edge_set == {(2,0),(2,1),(1,0),(0,0)} hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
assert subg['flips'].number_of_edges() == 0 )
assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)}
assert subg["flips"].number_of_edges() == 0
for ntype in subg.ntypes: for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data assert dgl.NID not in subg.nodes[ntype].data
# Test store_ids # Test store_ids
subg = dgl.in_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False) subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False)
for etype in ['follow', 'play', 'liked-by']: for etype in ["follow", "play", "liked-by"]:
assert dgl.EID not in subg.edges[etype].data assert dgl.EID not in subg.edges[etype].data
for ntype in subg.ntypes: for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data assert dgl.NID not in subg.nodes[ntype].data
# Test relabel nodes # Test relabel nodes
subg = dgl.in_subgraph(hg, {'user': [0, 1], 'game': 0}, relabel_nodes=True) subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, relabel_nodes=True)
assert subg.idtype == idtype assert subg.idtype == idtype
assert len(subg.ntypes) == 3 assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4 assert len(subg.etypes) == 4
u, v = subg['follow'].edges() u, v = subg["follow"].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID]) assert F.array_equal(
hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1,0),(2,0),(3,0),(0,1),(2,1),(3,1)} assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)}
u, v = subg['play'].edges() u, v = subg["play"].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v)
assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID]) assert F.array_equal(
hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(0,0)} assert edge_set == {(0, 0)}
u, v = subg['liked-by'].edges() u, v = subg["liked-by"].edges()
old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID]) assert F.array_equal(
hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID]
)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(2,0),(2,1),(1,0),(0,0)} assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)}
assert subg.num_nodes("user") == 4
assert subg.num_nodes("game") == 3
assert subg.num_nodes("coin") == 0
assert subg.num_edges("flips") == 0
assert subg.num_nodes('user') == 4
assert subg.num_nodes('game') == 3
assert subg.num_nodes('coin') == 0
assert subg.num_edges('flips') == 0
@parametrize_idtype @parametrize_idtype
def test_out_subgraph(idtype): def test_out_subgraph(idtype):
hg = dgl.heterograph({ hg = dgl.heterograph(
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), {
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ("user", "follow", "user"): (
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), [1, 2, 3, 0, 2, 3, 0],
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) [0, 0, 0, 1, 1, 1, 2],
}, idtype=idtype).to(F.ctx()) ),
subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}) ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
},
idtype=idtype,
).to(F.ctx())
subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0})
assert subg.idtype == idtype assert subg.idtype == idtype
assert len(subg.ntypes) == 3 assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4 assert len(subg.etypes) == 4
u, v = subg['follow'].edges() u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(1,0),(0,1),(0,2)} assert edge_set == {(1, 0), (0, 1), (0, 2)}
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) assert F.array_equal(
u, v = subg['play'].edges() hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
)
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0),(0,1),(1,2)} assert edge_set == {(0, 0), (0, 1), (1, 2)}
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID])
u, v = subg['liked-by'].edges() u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0)} assert edge_set == {(0, 0)}
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) assert F.array_equal(
u, v = subg['flips'].edges() hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
)
u, v = subg["flips"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0),(1,0)} assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID]) assert F.array_equal(
hg["flips"].edge_ids(u, v), subg["flips"].edata[dgl.EID]
)
for ntype in subg.ntypes: for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data assert dgl.NID not in subg.nodes[ntype].data
# Test store_ids # Test store_ids
subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}, store_ids=False) subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False)
for etype in subg.canonical_etypes: for etype in subg.canonical_etypes:
assert dgl.EID not in subg.edges[etype].data assert dgl.EID not in subg.edges[etype].data
for ntype in subg.ntypes: for ntype in subg.ntypes:
assert dgl.NID not in subg.nodes[ntype].data assert dgl.NID not in subg.nodes[ntype].data
# Test relabel nodes # Test relabel nodes
subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True) subg = dgl.out_subgraph(hg, {"user": [1], "game": 0}, relabel_nodes=True)
assert subg.idtype == idtype assert subg.idtype == idtype
assert len(subg.ntypes) == 3 assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4 assert len(subg.etypes) == 4
u, v = subg['follow'].edges() u, v = subg["follow"].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1, 0)} assert edge_set == {(1, 0)}
assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID]) assert F.array_equal(
hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID]
)
u, v = subg['play'].edges() u, v = subg["play"].edges()
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1, 2)} assert edge_set == {(1, 2)}
assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID]) assert F.array_equal(
hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID]
)
u, v = subg['liked-by'].edges() u, v = subg["liked-by"].edges()
old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u) old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(0,0)} assert edge_set == {(0, 0)}
assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID]) assert F.array_equal(
hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID]
u, v = subg['flips'].edges() )
old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v) u, v = subg["flips"].edges()
old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u)
old_v = F.gather_row(subg.nodes["coin"].data[dgl.NID], v)
edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v))))
assert edge_set == {(1,0)} assert edge_set == {(1, 0)}
assert F.array_equal(hg['flips'].edge_ids(old_u, old_v), subg['flips'].edata[dgl.EID]) assert F.array_equal(
assert subg.num_nodes('user') == 2 hg["flips"].edge_ids(old_u, old_v), subg["flips"].edata[dgl.EID]
assert subg.num_nodes('game') == 2 )
assert subg.num_nodes('coin') == 1 assert subg.num_nodes("user") == 2
assert subg.num_nodes("game") == 2
assert subg.num_nodes("coin") == 1
def test_subgraph_message_passing(): def test_subgraph_message_passing():
# Unit test for PR #2055 # Unit test for PR #2055
g = dgl.graph(([0, 1, 2], [2, 3, 4])).to(F.cpu()) g = dgl.graph(([0, 1, 2], [2, 3, 4])).to(F.cpu())
g.ndata['x'] = F.copy_to(F.randn((5, 6)), F.cpu()) g.ndata["x"] = F.copy_to(F.randn((5, 6)), F.cpu())
sg = g.subgraph([1, 2, 3]).to(F.ctx()) sg = g.subgraph([1, 2, 3]).to(F.ctx())
sg.update_all(lambda edges: {'x': edges.src['x']}, lambda nodes: {'y': F.sum(nodes.mailbox['x'], 1)}) sg.update_all(
lambda edges: {"x": edges.src["x"]},
lambda nodes: {"y": F.sum(nodes.mailbox["x"], 1)},
)
@parametrize_idtype @parametrize_idtype
def test_khop_in_subgraph(idtype): def test_khop_in_subgraph(idtype):
g = dgl.graph(([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]), idtype=idtype, device=F.ctx()) g = dgl.graph(
g.edata['w'] = F.tensor([ ([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]), idtype=idtype, device=F.ctx()
[0, 1], )
[2, 3], g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
[4, 5],
[6, 7],
[8, 9]
])
sg, inv = dgl.khop_in_subgraph(g, 0, k=2) sg, inv = dgl.khop_in_subgraph(g, 0, k=2)
assert sg.idtype == g.idtype assert sg.idtype == g.idtype
u, v = sg.edges() u, v = sg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(1,0), (1,2), (2,0), (3,2)} assert edge_set == {(1, 0), (1, 2), (2, 0), (3, 2)}
assert F.array_equal(sg.edata[dgl.EID], F.tensor([0, 1, 2, 4], dtype=idtype)) assert F.array_equal(
assert F.array_equal(sg.edata['w'], F.tensor([ sg.edata[dgl.EID], F.tensor([0, 1, 2, 4], dtype=idtype)
[0, 1], )
[2, 3], assert F.array_equal(
[4, 5], sg.edata["w"], F.tensor([[0, 1], [2, 3], [4, 5], [8, 9]])
[8, 9] )
]))
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
# Test multiple nodes # Test multiple nodes
...@@ -497,66 +621,70 @@ def test_khop_in_subgraph(idtype): ...@@ -497,66 +621,70 @@ def test_khop_in_subgraph(idtype):
assert sg.num_edges() == 0 assert sg.num_edges() == 0
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), {
('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]), ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]),
}, idtype=idtype, device=F.ctx()) ("user", "follows", "user"): ([0, 1, 1], [1, 2, 2]),
sg, inv = dgl.khop_in_subgraph(g, {'game': 0}, k=2) },
idtype=idtype,
device=F.ctx(),
)
sg, inv = dgl.khop_in_subgraph(g, {"game": 0}, k=2)
assert sg.idtype == idtype assert sg.idtype == idtype
assert sg.num_nodes('game') == 1 assert sg.num_nodes("game") == 1
assert sg.num_nodes('user') == 2 assert sg.num_nodes("user") == 2
assert len(sg.ntypes) == 2 assert len(sg.ntypes) == 2
assert len(sg.etypes) == 2 assert len(sg.etypes) == 2
u, v = sg['follows'].edges() u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)} assert edge_set == {(0, 1)}
u, v = sg['plays'].edges() u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 0), (1, 0)} assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
# Test isolated node # Test isolated node
sg, inv = dgl.khop_in_subgraph(g, {'user': 0}, k=2) sg, inv = dgl.khop_in_subgraph(g, {"user": 0}, k=2)
assert sg.idtype == idtype assert sg.idtype == idtype
assert sg.num_nodes('game') == 0 assert sg.num_nodes("game") == 0
assert sg.num_nodes('user') == 1 assert sg.num_nodes("user") == 1
assert sg.num_edges('follows') == 0 assert sg.num_edges("follows") == 0
assert sg.num_edges('plays') == 0 assert sg.num_edges("plays") == 0
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test multiple nodes # Test multiple nodes
sg, inv = dgl.khop_in_subgraph(g, {'user': F.tensor([0, 1], idtype), 'game': 0}, k=1) sg, inv = dgl.khop_in_subgraph(
u, v = sg['follows'].edges() g, {"user": F.tensor([0, 1], idtype), "game": 0}, k=1
)
u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)} assert edge_set == {(0, 1)}
u, v = sg['plays'].edges() u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 0), (1, 0)} assert edge_set == {(0, 0), (1, 0)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0, 1], idtype)) assert F.array_equal(
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype)) F.astype(inv["user"], idtype), F.tensor([0, 1], idtype)
)
assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
@parametrize_idtype @parametrize_idtype
def test_khop_out_subgraph(idtype): def test_khop_out_subgraph(idtype):
g = dgl.graph(([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]), idtype=idtype, device=F.ctx()) g = dgl.graph(
g.edata['w'] = F.tensor([ ([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]), idtype=idtype, device=F.ctx()
[0, 1], )
[2, 3], g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
[4, 5],
[6, 7],
[8, 9]
])
sg, inv = dgl.khop_out_subgraph(g, 0, k=2) sg, inv = dgl.khop_out_subgraph(g, 0, k=2)
assert sg.idtype == g.idtype assert sg.idtype == g.idtype
u, v = sg.edges() u, v = sg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,1), (2,1), (0,2), (2,3)} assert edge_set == {(0, 1), (2, 1), (0, 2), (2, 3)}
assert F.array_equal(sg.edata[dgl.EID], F.tensor([0, 2, 1, 4], dtype=idtype)) assert F.array_equal(
assert F.array_equal(sg.edata['w'], F.tensor([ sg.edata[dgl.EID], F.tensor([0, 2, 1, 4], dtype=idtype)
[0, 1], )
[4, 5], assert F.array_equal(
[2, 3], sg.edata["w"], F.tensor([[0, 1], [4, 5], [2, 3], [8, 9]])
[8, 9] )
]))
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
# Test multiple nodes # Test multiple nodes
...@@ -573,92 +701,111 @@ def test_khop_out_subgraph(idtype): ...@@ -573,92 +701,111 @@ def test_khop_out_subgraph(idtype):
assert sg.num_edges() == 0 assert sg.num_edges() == 0
assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype))
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), {
('user', 'follows', 'user'): ([0, 1], [1, 3]), ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]),
}, idtype=idtype, device=F.ctx()) ("user", "follows", "user"): ([0, 1], [1, 3]),
sg, inv = dgl.khop_out_subgraph(g, {'user': 0}, k=2) },
idtype=idtype,
device=F.ctx(),
)
sg, inv = dgl.khop_out_subgraph(g, {"user": 0}, k=2)
assert sg.idtype == idtype assert sg.idtype == idtype
assert sg.num_nodes('game') == 2 assert sg.num_nodes("game") == 2
assert sg.num_nodes('user') == 3 assert sg.num_nodes("user") == 3
assert len(sg.ntypes) == 2 assert len(sg.ntypes) == 2
assert len(sg.etypes) == 2 assert len(sg.etypes) == 2
u, v = sg['follows'].edges() u, v = sg["follows"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1), (1, 2)} assert edge_set == {(0, 1), (1, 2)}
u, v = sg['plays'].edges() u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0,0), (1,0), (1,1)} assert edge_set == {(0, 0), (1, 0), (1, 1)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test isolated node # Test isolated node
sg, inv = dgl.khop_out_subgraph(g, {'user': 3}, k=2) sg, inv = dgl.khop_out_subgraph(g, {"user": 3}, k=2)
assert sg.idtype == idtype assert sg.idtype == idtype
assert sg.num_nodes('game') == 0 assert sg.num_nodes("game") == 0
assert sg.num_nodes('user') == 1 assert sg.num_nodes("user") == 1
assert sg.num_edges('follows') == 0 assert sg.num_edges("follows") == 0
assert sg.num_edges('plays') == 0 assert sg.num_edges("plays") == 0
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
# Test multiple nodes # Test multiple nodes
sg, inv = dgl.khop_out_subgraph(g, {'user': F.tensor([2], idtype), 'game': 0}, k=1) sg, inv = dgl.khop_out_subgraph(
assert sg.num_edges('follows') == 0 g, {"user": F.tensor([2], idtype), "game": 0}, k=1
u, v = sg['plays'].edges() )
assert sg.num_edges("follows") == 0
u, v = sg["plays"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(0, 1)} assert edge_set == {(0, 1)}
assert F.array_equal(F.astype(inv['user'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype))
assert F.array_equal(F.astype(inv['game'], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype))
@unittest.skipIf(not F.gpu_ctx(), 'only necessary with GPU')
@unittest.skipIf(not F.gpu_ctx(), "only necessary with GPU")
@pytest.mark.parametrize( @pytest.mark.parametrize(
'parent_idx_device', [('cpu', F.cpu()), ('cuda', F.cuda()), ('uva', F.cpu()), ('uva', F.cuda())]) "parent_idx_device",
@pytest.mark.parametrize('child_device', [F.cpu(), F.cuda()]) [("cpu", F.cpu()), ("cuda", F.cuda()), ("uva", F.cpu()), ("uva", F.cuda())],
)
@pytest.mark.parametrize("child_device", [F.cpu(), F.cuda()])
def test_subframes(parent_idx_device, child_device): def test_subframes(parent_idx_device, child_device):
parent_device, idx_device = parent_idx_device parent_device, idx_device = parent_idx_device
g = dgl.graph((F.tensor([1,2,3], dtype=F.int64), F.tensor([2,3,4], dtype=F.int64))) g = dgl.graph(
(F.tensor([1, 2, 3], dtype=F.int64), F.tensor([2, 3, 4], dtype=F.int64))
)
print(g.device) print(g.device)
g.ndata['x'] = F.randn((5, 4)) g.ndata["x"] = F.randn((5, 4))
g.edata['a'] = F.randn((3, 6)) g.edata["a"] = F.randn((3, 6))
idx = F.tensor([1, 2], dtype=F.int64) idx = F.tensor([1, 2], dtype=F.int64)
if parent_device == 'cuda': if parent_device == "cuda":
g = g.to(F.cuda()) g = g.to(F.cuda())
elif parent_device == 'uva': elif parent_device == "uva":
if F.backend_name != 'pytorch': if F.backend_name != "pytorch":
pytest.skip("UVA only supported for PyTorch") pytest.skip("UVA only supported for PyTorch")
g = g.to(F.cpu()) g = g.to(F.cpu())
g.create_formats_() g.create_formats_()
g.pin_memory_() g.pin_memory_()
elif parent_device == 'cpu': elif parent_device == "cpu":
g = g.to(F.cpu()) g = g.to(F.cpu())
idx = F.copy_to(idx, idx_device) idx = F.copy_to(idx, idx_device)
sg = g.sample_neighbors(idx, 2).to(child_device) sg = g.sample_neighbors(idx, 2).to(child_device)
assert sg.device == F.context(sg.ndata['x']) assert sg.device == F.context(sg.ndata["x"])
assert sg.device == F.context(sg.edata['a']) assert sg.device == F.context(sg.edata["a"])
assert sg.device == child_device assert sg.device == child_device
if parent_device != 'uva': if parent_device != "uva":
sg = g.to(child_device).sample_neighbors(F.copy_to(idx, child_device), 2) sg = g.to(child_device).sample_neighbors(
assert sg.device == F.context(sg.ndata['x']) F.copy_to(idx, child_device), 2
assert sg.device == F.context(sg.edata['a']) )
assert sg.device == F.context(sg.ndata["x"])
assert sg.device == F.context(sg.edata["a"])
assert sg.device == child_device assert sg.device == child_device
if parent_device == 'uva': if parent_device == "uva":
g.unpin_memory_() g.unpin_memory_()
@unittest.skipIf(F._default_context_str != "gpu", reason="UVA only available on GPU")
@pytest.mark.parametrize('device', [F.cpu(), F.cuda()]) @unittest.skipIf(
@unittest.skipIf(dgl.backend.backend_name != "pytorch", reason="UVA only supported for PyTorch") F._default_context_str != "gpu", reason="UVA only available on GPU"
)
@pytest.mark.parametrize("device", [F.cpu(), F.cuda()])
@unittest.skipIf(
dgl.backend.backend_name != "pytorch",
reason="UVA only supported for PyTorch",
)
@parametrize_idtype @parametrize_idtype
def test_uva_subgraph(idtype, device): def test_uva_subgraph(idtype, device):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
g = g.to(F.cpu()) g = g.to(F.cpu())
g.create_formats_() g.create_formats_()
g.pin_memory_() g.pin_memory_()
indices = {'user': F.copy_to(F.tensor([0], idtype), device)} indices = {"user": F.copy_to(F.tensor([0], idtype), device)}
edge_indices = {'follows': F.copy_to(F.tensor([0], idtype), device)} edge_indices = {"follows": F.copy_to(F.tensor([0], idtype), device)}
assert g.subgraph(indices).device == device assert g.subgraph(indices).device == device
assert g.edge_subgraph(edge_indices).device == device assert g.edge_subgraph(edge_indices).device == device
assert g.in_subgraph(indices).device == device assert g.in_subgraph(indices).device == device
assert g.out_subgraph(indices).device == device assert g.out_subgraph(indices).device == device
if dgl.backend.backend_name != 'tensorflow': if dgl.backend.backend_name != "tensorflow":
# (BarclayII) Most of Tensorflow functions somehow do not preserve device: a CPU tensor # (BarclayII) Most of Tensorflow functions somehow do not preserve device: a CPU tensor
# becomes a GPU tensor after operations such as concat(), unique() or even sin(). # becomes a GPU tensor after operations such as concat(), unique() or even sin().
# Not sure what should be the best fix. # Not sure what should be the best fix.
...@@ -667,6 +814,7 @@ def test_uva_subgraph(idtype, device): ...@@ -667,6 +814,7 @@ def test_uva_subgraph(idtype, device):
assert g.sample_neighbors(indices, 1).device == device assert g.sample_neighbors(indices, 1).device == device
g.unpin_memory_() g.unpin_memory_()
if __name__ == '__main__':
if __name__ == "__main__":
test_edge_subgraph() test_edge_subgraph()
# test_uva_subgraph(F.int64, F.cpu()) # test_uva_subgraph(F.int64, F.cpu())
import itertools
import random import random
import sys import sys
import time import time
import unittest import unittest
import dgl import backend as F
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import scipy.sparse as sp import scipy.sparse as sp
import backend as F
import itertools
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
np.random.seed(42) np.random.seed(42)
def toset(x): def toset(x):
# F.zerocopy_to_numpy may return a int # F.zerocopy_to_numpy may return a int
return set(F.zerocopy_to_numpy(x).tolist()) return set(F.zerocopy_to_numpy(x).tolist())
@parametrize_idtype @parametrize_idtype
def test_bfs(idtype, n=100): def test_bfs(idtype, n=100):
def _bfs_nx(g_nx, src): def _bfs_nx(g_nx, src):
...@@ -59,6 +61,7 @@ def test_bfs(idtype, n=100): ...@@ -59,6 +61,7 @@ def test_bfs(idtype, n=100):
assert len(edges_dgl) == len(edges_nx) assert len(edges_dgl) == len(edges_nx)
assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx)) assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
@parametrize_idtype @parametrize_idtype
def test_topological_nodes(idtype, n=100): def test_topological_nodes(idtype, n=100):
a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n)) a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
...@@ -68,12 +71,13 @@ def test_topological_nodes(idtype, n=100): ...@@ -68,12 +71,13 @@ def test_topological_nodes(idtype, n=100):
layers_dgl = dgl.topological_nodes_generator(g) layers_dgl = dgl.topological_nodes_generator(g)
adjmat = g.adjacency_matrix(transpose=True) adjmat = g.adjacency_matrix(transpose=True)
def tensor_topo_traverse(): def tensor_topo_traverse():
n = g.number_of_nodes() n = g.number_of_nodes()
mask = F.copy_to(F.ones((n, 1)), F.cpu()) mask = F.copy_to(F.ones((n, 1)), F.cpu())
degree = F.spmm(adjmat, mask) degree = F.spmm(adjmat, mask)
while F.reduce_sum(mask) != 0.: while F.reduce_sum(mask) != 0.0:
v = F.astype((degree == 0.), F.float32) v = F.astype((degree == 0.0), F.float32)
v = v * mask v = v * mask
mask = mask - v mask = mask - v
frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu()) frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu())
...@@ -85,33 +89,41 @@ def test_topological_nodes(idtype, n=100): ...@@ -85,33 +89,41 @@ def test_topological_nodes(idtype, n=100):
assert len(layers_dgl) == len(layers_spmv) assert len(layers_dgl) == len(layers_spmv)
assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv)) assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
DFS_LABEL_NAMES = ['forward', 'reverse', 'nontree']
DFS_LABEL_NAMES = ["forward", "reverse", "nontree"]
@parametrize_idtype @parametrize_idtype
def test_dfs_labeled_edges(idtype, example=False): def test_dfs_labeled_edges(idtype, example=False):
dgl_g = dgl.DGLGraph().astype(idtype) dgl_g = dgl.DGLGraph().astype(idtype)
dgl_g.add_nodes(6) dgl_g.add_nodes(6)
dgl_g.add_edges([0, 1, 0, 3, 3], [1, 2, 2, 4, 5]) dgl_g.add_edges([0, 1, 0, 3, 3], [1, 2, 2, 4, 5])
dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator( dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator(
dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True) dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True
)
dgl_edges = [toset(t) for t in dgl_edges] dgl_edges = [toset(t) for t in dgl_edges]
dgl_labels = [toset(t) for t in dgl_labels] dgl_labels = [toset(t) for t in dgl_labels]
g1_solutions = [ g1_solutions = [
# edges labels # edges labels
[[0, 1, 1, 0, 2], [0, 0, 1, 1, 2]], [[0, 1, 1, 0, 2], [0, 0, 1, 1, 2]],
[[2, 2, 0, 1, 0], [0, 1, 0, 2, 1]], [[2, 2, 0, 1, 0], [0, 1, 0, 2, 1]],
] ]
g2_solutions = [ g2_solutions = [
# edges labels # edges labels
[[3, 3, 4, 4], [0, 1, 0, 1]], [[3, 3, 4, 4], [0, 1, 0, 1]],
[[4, 4, 3, 3], [0, 1, 0, 1]], [[4, 4, 3, 3], [0, 1, 0, 1]],
] ]
def combine_frontiers(sol): def combine_frontiers(sol):
es, ls = zip(*sol) es, ls = zip(*sol)
es = [set(i for i in t if i is not None) es = [
for t in itertools.zip_longest(*es)] set(i for i in t if i is not None)
ls = [set(i for i in t if i is not None) for t in itertools.zip_longest(*es)
for t in itertools.zip_longest(*ls)] ]
ls = [
set(i for i in t if i is not None)
for t in itertools.zip_longest(*ls)
]
return es, ls return es, ls
for sol_set in itertools.product(g1_solutions, g2_solutions): for sol_set in itertools.product(g1_solutions, g2_solutions):
...@@ -121,7 +133,8 @@ def test_dfs_labeled_edges(idtype, example=False): ...@@ -121,7 +133,8 @@ def test_dfs_labeled_edges(idtype, example=False):
else: else:
assert False assert False
if __name__ == '__main__':
test_bfs(idtype='int32') if __name__ == "__main__":
test_topological_nodes(idtype='int32') test_bfs(idtype="int32")
test_dfs_labeled_edges(idtype='int32') test_topological_nodes(idtype="int32")
test_dfs_labeled_edges(idtype="int32")
# NOTE(vibwu): Currently cugraph must be imported before torch to avoid a resource cleanup issue. # NOTE(vibwu): Currently cugraph must be imported before torch to avoid a resource cleanup issue.
# See https://github.com/rapidsai/cugraph/issues/2718 # See https://github.com/rapidsai/cugraph/issues/2718
import cugraph import unittest
import backend as F import backend as F
import dgl import cugraph
import numpy as np import numpy as np
from dgl import DGLGraph
import unittest
import pytest import pytest
import dgl
from dgl import DGLGraph
def test_dummy(): def test_dummy():
cg = cugraph.Graph() cg = cugraph.Graph()
assert cg is not None assert cg is not None
def test_to_cugraph_conversion(): def test_to_cugraph_conversion():
g = dgl.graph((F.tensor([0, 1, 2, 3]), F.tensor([1, 0, 3, 2]))).to('cuda') g = dgl.graph((F.tensor([0, 1, 2, 3]), F.tensor([1, 0, 3, 2]))).to("cuda")
cugraph_g = g.to_cugraph() cugraph_g = g.to_cugraph()
assert cugraph_g.number_of_nodes()==g.number_of_nodes() assert cugraph_g.number_of_nodes() == g.number_of_nodes()
assert cugraph_g.number_of_edges()==g.number_of_edges() assert cugraph_g.number_of_edges() == g.number_of_edges()
assert cugraph_g.has_edge(0, 1) assert cugraph_g.has_edge(0, 1)
assert cugraph_g.has_edge(1, 0) assert cugraph_g.has_edge(1, 0)
assert cugraph_g.has_edge(3, 2) assert cugraph_g.has_edge(3, 2)
def test_from_cugraph_conversion(): def test_from_cugraph_conversion():
# cudf is a dependency of cugraph # cudf is a dependency of cugraph
import cudf import cudf
# directed graph conversion test # directed graph conversion test
cugraph_g = cugraph.Graph(directed=True) cugraph_g = cugraph.Graph(directed=True)
df = cudf.DataFrame({"source":[0, 1, 2, 3], df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]})
"destination":[1, 2, 3, 2]})
cugraph_g.from_cudf_edgelist(df) cugraph_g.from_cudf_edgelist(df)
g = dgl.from_cugraph(cugraph_g) g = dgl.from_cugraph(cugraph_g)
assert g.device.type == 'cuda' assert g.device.type == "cuda"
assert g.number_of_nodes() == cugraph_g.number_of_nodes() assert g.number_of_nodes() == cugraph_g.number_of_nodes()
assert g.number_of_edges() == cugraph_g.number_of_edges() assert g.number_of_edges() == cugraph_g.number_of_edges()
...@@ -50,14 +53,13 @@ def test_from_cugraph_conversion(): ...@@ -50,14 +53,13 @@ def test_from_cugraph_conversion():
# undirected graph conversion test # undirected graph conversion test
cugraph_g = cugraph.Graph(directed=False) cugraph_g = cugraph.Graph(directed=False)
df = cudf.DataFrame({"source":[0, 1, 2, 3], df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]})
"destination":[1, 2, 3, 2]})
cugraph_g.from_cudf_edgelist(df) cugraph_g.from_cudf_edgelist(df)
g = dgl.from_cugraph(cugraph_g) g = dgl.from_cugraph(cugraph_g)
assert g.device.type == 'cuda' assert g.device.type == "cuda"
assert g.number_of_nodes() == cugraph_g.number_of_nodes() assert g.number_of_nodes() == cugraph_g.number_of_nodes()
# assert reverse edges are present # assert reverse edges are present
assert g.has_edges_between(0, 1) assert g.has_edges_between(0, 1)
......
import os import os
import dgl
import backend as F import backend as F
from numpy.testing import assert_array_equal from numpy.testing import assert_array_equal
import dgl
INTEGER = 2 INTEGER = 2
STR = 'hello world!' STR = "hello world!"
HELLO_SERVICE_ID = 901231 HELLO_SERVICE_ID = 901231
TENSOR = F.zeros((1000, 1000), F.int64, F.cpu()) TENSOR = F.zeros((1000, 1000), F.int64, F.cpu())
...@@ -47,25 +49,36 @@ class HelloRequest(dgl.distributed.Request): ...@@ -47,25 +49,36 @@ class HelloRequest(dgl.distributed.Request):
return res return res
def start_server(server_id, ip_config, num_servers, num_clients, net_type, keep_alive): def start_server(
server_id, ip_config, num_servers, num_clients, net_type, keep_alive
):
server_state = dgl.distributed.ServerState( server_state = dgl.distributed.ServerState(
None, local_g=None, partition_book=None, keep_alive=keep_alive) None, local_g=None, partition_book=None, keep_alive=keep_alive
)
dgl.distributed.register_service( dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse) HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
print("Start server {}".format(server_id)) print("Start server {}".format(server_id))
dgl.distributed.start_server(server_id=server_id, dgl.distributed.start_server(
ip_config=ip_config, server_id=server_id,
num_servers=num_servers, ip_config=ip_config,
num_clients=num_clients, num_servers=num_servers,
server_state=server_state, num_clients=num_clients,
net_type=net_type) server_state=server_state,
net_type=net_type,
)
def start_client(ip_config, num_servers, group_id, net_type): def start_client(ip_config, num_servers, group_id, net_type):
dgl.distributed.register_service( dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse) HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.connect_to_server( dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type) ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
req = HelloRequest(STR, INTEGER, TENSOR, tensor_func) req = HelloRequest(STR, INTEGER, TENSOR, tensor_func)
server_namebook = dgl.distributed.read_ip_config(ip_config, num_servers) server_namebook = dgl.distributed.read_ip_config(ip_config, num_servers)
for server_id in server_namebook.keys(): for server_id in server_namebook.keys():
...@@ -102,19 +115,20 @@ def start_client(ip_config, num_servers, group_id, net_type): ...@@ -102,19 +115,20 @@ def start_client(ip_config, num_servers, group_id, net_type):
def main(): def main():
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG') ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG")
num_servers = int(os.environ.get('DIST_DGL_TEST_NUM_SERVERS')) num_servers = int(os.environ.get("DIST_DGL_TEST_NUM_SERVERS"))
net_type = os.environ.get('DIST_DGL_TEST_NET_TYPE', 'tensorpipe') net_type = os.environ.get("DIST_DGL_TEST_NET_TYPE", "tensorpipe")
if os.environ.get('DIST_DGL_TEST_ROLE', 'server') == 'server': if os.environ.get("DIST_DGL_TEST_ROLE", "server") == "server":
server_id = int(os.environ.get('DIST_DGL_TEST_SERVER_ID')) server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID"))
num_clients = int(os.environ.get('DIST_DGL_TEST_NUM_CLIENTS')) num_clients = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENTS"))
keep_alive = 'DIST_DGL_TEST_KEEP_ALIVE' in os.environ keep_alive = "DIST_DGL_TEST_KEEP_ALIVE" in os.environ
start_server(server_id, ip_config, num_servers, start_server(
num_clients, net_type, keep_alive) server_id, ip_config, num_servers, num_clients, net_type, keep_alive
)
else: else:
group_id = int(os.environ.get('DIST_DGL_TEST_GROUP_ID', '0')) group_id = int(os.environ.get("DIST_DGL_TEST_GROUP_ID", "0"))
start_client(ip_config, num_servers, group_id, net_type) start_client(ip_config, num_servers, group_id, net_type)
if __name__ == '__main__': if __name__ == "__main__":
main() main()
import dgl
import os import os
import numpy as np import numpy as np
import dgl
import dgl.backend as F import dgl.backend as F
from dgl.distributed import load_partition_book from dgl.distributed import load_partition_book
mode = os.environ.get('DIST_DGL_TEST_MODE', "") mode = os.environ.get("DIST_DGL_TEST_MODE", "")
graph_name = os.environ.get('DIST_DGL_TEST_GRAPH_NAME', 'random_test_graph') graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph")
num_part = int(os.environ.get('DIST_DGL_TEST_NUM_PART')) num_part = int(os.environ.get("DIST_DGL_TEST_NUM_PART"))
num_servers_per_machine = int(os.environ.get('DIST_DGL_TEST_NUM_SERVER')) num_servers_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_SERVER"))
num_client_per_machine = int(os.environ.get('DIST_DGL_TEST_NUM_CLIENT')) num_client_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENT"))
shared_workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE') shared_workspace = os.environ.get("DIST_DGL_TEST_WORKSPACE")
graph_path = os.environ.get('DIST_DGL_TEST_GRAPH_PATH') graph_path = os.environ.get("DIST_DGL_TEST_GRAPH_PATH")
part_id = int(os.environ.get('DIST_DGL_TEST_PART_ID')) part_id = int(os.environ.get("DIST_DGL_TEST_PART_ID"))
net_type = os.environ.get('DIST_DGL_TEST_NET_TYPE') net_type = os.environ.get("DIST_DGL_TEST_NET_TYPE")
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt') ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
os.environ["DGL_DIST_MODE"] = "distributed"
os.environ['DGL_DIST_MODE'] = 'distributed'
def zeros_init(shape, dtype): def zeros_init(shape, dtype):
return F.zeros(shape, dtype=dtype, ctx=F.cpu()) return F.zeros(shape, dtype=dtype, ctx=F.cpu())
def run_server(graph_name, server_id, server_count, num_clients, shared_mem, keep_alive=False):
def run_server(
graph_name,
server_id,
server_count,
num_clients,
shared_mem,
keep_alive=False,
):
# server_count = num_servers_per_machine # server_count = num_servers_per_machine
g = dgl.distributed.DistGraphServer(server_id, ip_config, g = dgl.distributed.DistGraphServer(
server_count, num_clients, server_id,
graph_path + '/{}.json'.format(graph_name), ip_config,
disable_shared_mem=not shared_mem, server_count,
graph_format=['csc', 'coo'], keep_alive=keep_alive, num_clients,
net_type=net_type) graph_path + "/{}.json".format(graph_name),
print('start server', server_id) disable_shared_mem=not shared_mem,
graph_format=["csc", "coo"],
keep_alive=keep_alive,
net_type=net_type,
)
print("start server", server_id)
g.start() g.start()
########################################## ##########################################
############### DistTensor ############### ############### DistTensor ###############
########################################## ##########################################
def dist_tensor_test_sanity(data_shape, name=None): def dist_tensor_test_sanity(data_shape, name=None):
local_rank = dgl.distributed.get_rank() % num_client_per_machine local_rank = dgl.distributed.get_rank() % num_client_per_machine
dist_ten = dgl.distributed.DistTensor(data_shape, dist_ten = dgl.distributed.DistTensor(
F.int32, data_shape, F.int32, init_func=zeros_init, name=name
init_func=zeros_init, )
name=name)
# arbitrary value # arbitrary value
stride = 3 stride = 3
pos = (part_id // 2) * num_client_per_machine + local_rank pos = (part_id // 2) * num_client_per_machine + local_rank
if part_id % 2 == 0: if part_id % 2 == 0:
dist_ten[pos*stride:(pos+1)*stride] = F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos+1) dist_ten[pos * stride : (pos + 1) * stride] = F.ones(
(stride, 2), dtype=F.int32, ctx=F.cpu()
) * (pos + 1)
dgl.distributed.client_barrier() dgl.distributed.client_barrier()
assert F.allclose(dist_ten[pos*stride:(pos+1)*stride], assert F.allclose(
F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos+1)) dist_ten[pos * stride : (pos + 1) * stride],
F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos + 1),
)
def dist_tensor_test_destroy_recreate(data_shape, name): def dist_tensor_test_destroy_recreate(data_shape, name):
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, name, init_func=zeros_init) dist_ten = dgl.distributed.DistTensor(
data_shape, F.float32, name, init_func=zeros_init
)
del dist_ten del dist_ten
dgl.distributed.client_barrier() dgl.distributed.client_barrier()
new_shape = (data_shape[0], 4) new_shape = (data_shape[0], 4)
dist_ten = dgl.distributed.DistTensor(new_shape, F.float32, name, init_func=zeros_init) dist_ten = dgl.distributed.DistTensor(
new_shape, F.float32, name, init_func=zeros_init
)
def dist_tensor_test_persistent(data_shape): def dist_tensor_test_persistent(data_shape):
dist_ten_name = 'persistent_dist_tensor' dist_ten_name = "persistent_dist_tensor"
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, dist_ten_name, init_func=zeros_init, dist_ten = dgl.distributed.DistTensor(
persistent=True) data_shape,
F.float32,
dist_ten_name,
init_func=zeros_init,
persistent=True,
)
del dist_ten del dist_ten
try: try:
dist_ten = dgl.distributed.DistTensor(data_shape, F.float32, dist_ten_name) dist_ten = dgl.distributed.DistTensor(
raise Exception('') data_shape, F.float32, dist_ten_name
)
raise Exception("")
except: except:
pass pass
...@@ -86,17 +119,20 @@ def test_dist_tensor(g): ...@@ -86,17 +119,20 @@ def test_dist_tensor(g):
############# DistEmbedding ############## ############# DistEmbedding ##############
########################################## ##########################################
def dist_embedding_check_sanity(num_nodes, optimizer, name=None): def dist_embedding_check_sanity(num_nodes, optimizer, name=None):
local_rank = dgl.distributed.get_rank() % num_client_per_machine local_rank = dgl.distributed.get_rank() % num_client_per_machine
emb = dgl.distributed.DistEmbedding(num_nodes, 1, name=name, init_func=zeros_init) emb = dgl.distributed.DistEmbedding(
num_nodes, 1, name=name, init_func=zeros_init
)
lr = 0.001 lr = 0.001
optim = optimizer(params=[emb], lr=lr) optim = optimizer(params=[emb], lr=lr)
stride = 3 stride = 3
pos = (part_id // 2) * num_client_per_machine + local_rank pos = (part_id // 2) * num_client_per_machine + local_rank
idx = F.arange(pos*stride, (pos+1)*stride) idx = F.arange(pos * stride, (pos + 1) * stride)
if part_id % 2 == 0: if part_id % 2 == 0:
with F.record_grad(): with F.record_grad():
...@@ -110,43 +146,64 @@ def dist_embedding_check_sanity(num_nodes, optimizer, name=None): ...@@ -110,43 +146,64 @@ def dist_embedding_check_sanity(num_nodes, optimizer, name=None):
value = emb(idx) value = emb(idx)
F.allclose(value, F.ones((len(idx), 1), dtype=F.int32, ctx=F.cpu()) * -lr) F.allclose(value, F.ones((len(idx), 1), dtype=F.int32, ctx=F.cpu()) * -lr)
not_update_idx = F.arange(((num_part + 1) / 2) * num_client_per_machine * stride, num_nodes) not_update_idx = F.arange(
((num_part + 1) / 2) * num_client_per_machine * stride, num_nodes
)
value = emb(not_update_idx) value = emb(not_update_idx)
assert np.all(F.asnumpy(value) == np.zeros((len(not_update_idx), 1))) assert np.all(F.asnumpy(value) == np.zeros((len(not_update_idx), 1)))
def dist_embedding_check_existing(num_nodes): def dist_embedding_check_existing(num_nodes):
dist_emb_name = "UniqueEmb" dist_emb_name = "UniqueEmb"
emb = dgl.distributed.DistEmbedding(num_nodes, 1, name=dist_emb_name, init_func=zeros_init) emb = dgl.distributed.DistEmbedding(
num_nodes, 1, name=dist_emb_name, init_func=zeros_init
)
try: try:
emb1 = dgl.distributed.DistEmbedding(num_nodes, 2, name=dist_emb_name, init_func=zeros_init) emb1 = dgl.distributed.DistEmbedding(
raise Exception('') num_nodes, 2, name=dist_emb_name, init_func=zeros_init
)
raise Exception("")
except: except:
pass pass
def test_dist_embedding(g): def test_dist_embedding(g):
num_nodes = g.number_of_nodes(g.ntypes[0]) num_nodes = g.number_of_nodes(g.ntypes[0])
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad) dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad)
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad, name='SomeEmbedding') dist_embedding_check_sanity(
dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdam, name='SomeEmbedding') num_nodes, dgl.distributed.optim.SparseAdagrad, name="SomeEmbedding"
)
dist_embedding_check_sanity(
num_nodes, dgl.distributed.optim.SparseAdam, name="SomeEmbedding"
)
dist_embedding_check_existing(num_nodes) dist_embedding_check_existing(num_nodes)
if mode == "server": if mode == "server":
shared_mem = bool(int(os.environ.get('DIST_DGL_TEST_SHARED_MEM'))) shared_mem = bool(int(os.environ.get("DIST_DGL_TEST_SHARED_MEM")))
server_id = int(os.environ.get('DIST_DGL_TEST_SERVER_ID')) server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID"))
run_server(graph_name, server_id, server_count=num_servers_per_machine, run_server(
num_clients=num_part*num_client_per_machine, shared_mem=shared_mem, keep_alive=False) graph_name,
server_id,
server_count=num_servers_per_machine,
num_clients=num_part * num_client_per_machine,
shared_mem=shared_mem,
keep_alive=False,
)
elif mode == "client": elif mode == "client":
os.environ['DGL_NUM_SERVER'] = str(num_servers_per_machine) os.environ["DGL_NUM_SERVER"] = str(num_servers_per_machine)
dgl.distributed.initialize(ip_config, net_type=net_type) dgl.distributed.initialize(ip_config, net_type=net_type)
gpb, graph_name, _, _ = load_partition_book(graph_path + '/{}.json'.format(graph_name), part_id, None) gpb, graph_name, _, _ = load_partition_book(
graph_path + "/{}.json".format(graph_name), part_id, None
)
g = dgl.distributed.DistGraph(graph_name, gpb=gpb) g = dgl.distributed.DistGraph(graph_name, gpb=gpb)
target_func_map = {"DistTensor": test_dist_tensor, target_func_map = {
"DistEmbedding": test_dist_embedding, "DistTensor": test_dist_tensor,
} "DistEmbedding": test_dist_embedding,
}
target = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "") target = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "")
if target not in target_func_map: if target not in target_func_map:
...@@ -158,4 +215,3 @@ elif mode == "client": ...@@ -158,4 +215,3 @@ elif mode == "client":
else: else:
print("DIST_DGL_TEST_MODE has to be either server or client") print("DIST_DGL_TEST_MODE has to be either server or client")
exit(1) exit(1)
import os import os
import unittest import unittest
from utils import execute_remote, get_ips from utils import execute_remote, get_ips
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_tensorpipe_comm(): def test_tensorpipe_comm():
base_dir = os.environ.get('DIST_DGL_TEST_CPP_BIN_DIR', '.') base_dir = os.environ.get("DIST_DGL_TEST_CPP_BIN_DIR", ".")
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt') ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
client_bin = os.path.join(base_dir, 'rpc_client') client_bin = os.path.join(base_dir, "rpc_client")
server_bin = os.path.join(base_dir, 'rpc_server') server_bin = os.path.join(base_dir, "rpc_server")
ips = get_ips(ip_config) ips = get_ips(ip_config)
num_machines = len(ips) num_machines = len(ips)
procs = [] procs = []
for ip in ips: for ip in ips:
procs.append(execute_remote(server_bin + " " + procs.append(
str(num_machines) + " " + ip, ip)) execute_remote(server_bin + " " + str(num_machines) + " " + ip, ip)
)
for ip in ips: for ip in ips:
procs.append(execute_remote(client_bin + " " + ip_config, ip)) procs.append(execute_remote(client_bin + " " + ip_config, ip))
for p in procs: for p in procs:
......
import multiprocessing as mp
import os import os
import subprocess
import unittest import unittest
import numpy as np
import pytest import pytest
import multiprocessing as mp
import subprocess
import utils import utils
import dgl import dgl
import numpy as np
import dgl.backend as F import dgl.backend as F
from dgl.distributed import partition_graph from dgl.distributed import partition_graph
graph_name = os.environ.get('DIST_DGL_TEST_GRAPH_NAME', 'random_test_graph') graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph")
target = os.environ.get('DIST_DGL_TEST_OBJECT_TYPE', '') target = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "")
shared_workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE') shared_workspace = os.environ.get("DIST_DGL_TEST_WORKSPACE")
def create_graph(num_part, dist_graph_path, hetero): def create_graph(num_part, dist_graph_path, hetero):
if not hetero: if not hetero:
g = dgl.rand_graph(10000, 42000) g = dgl.rand_graph(10000, 42000)
g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.ndata["feat"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) g.edata["feat"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_part, dist_graph_path) partition_graph(g, graph_name, num_part, dist_graph_path)
else: else:
from scipy import sparse as spsp from scipy import sparse as spsp
num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020}
etypes = [('n1', 'r1', 'n2'), num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020}
('n1', 'r2', 'n3'), etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
('n2', 'r3', 'n3')]
edges = {} edges = {}
for etype in etypes: for etype in etypes:
src_ntype, _, dst_ntype = etype src_ntype, _, dst_ntype = etype
arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', arr = spsp.random(
random_state=100) num_nodes[src_ntype],
num_nodes[dst_ntype],
density=0.001,
format="coo",
random_state=100,
)
edges[etype] = (arr.row, arr.col) edges[etype] = (arr.row, arr.col)
g = dgl.heterograph(edges, num_nodes) g = dgl.heterograph(edges, num_nodes)
g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1) g.nodes["n1"].data["feat"] = F.unsqueeze(
g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1) F.arange(0, g.number_of_nodes("n1")), 1
)
g.edges["r1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_edges("r1")), 1
)
partition_graph(g, graph_name, num_part, dist_graph_path) partition_graph(g, graph_name, num_part, dist_graph_path)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ['tensorpipe', 'socket']) @pytest.mark.parametrize("net_type", ["tensorpipe", "socket"])
@pytest.mark.parametrize("num_servers", [1, 4]) @pytest.mark.parametrize("num_servers", [1, 4])
@pytest.mark.parametrize("num_clients", [1, 4]) @pytest.mark.parametrize("num_clients", [1, 4])
@pytest.mark.parametrize("hetero", [False, True]) @pytest.mark.parametrize("hetero", [False, True])
@pytest.mark.parametrize("shared_mem", [False, True]) @pytest.mark.parametrize("shared_mem", [False, True])
def test_dist_objects(net_type, num_servers, num_clients, hetero, shared_mem): def test_dist_objects(net_type, num_servers, num_clients, hetero, shared_mem):
if not shared_mem and num_servers > 1: if not shared_mem and num_servers > 1:
pytest.skip(f"Backup servers are not supported when shared memory is disabled") pytest.skip(
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt') f"Backup servers are not supported when shared memory is disabled"
workspace = os.environ.get('DIST_DGL_TEST_WORKSPACE', '/shared_workspace/dgl_dist_tensor_test/') )
ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
workspace = os.environ.get(
"DIST_DGL_TEST_WORKSPACE", "/shared_workspace/dgl_dist_tensor_test/"
)
ips = utils.get_ips(ip_config) ips = utils.get_ips(ip_config)
num_part = len(ips) num_part = len(ips)
test_bin = os.path.join(os.environ.get( test_bin = os.path.join(
'DIST_DGL_TEST_PY_BIN_DIR', '.'), 'run_dist_objects.py') os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "run_dist_objects.py"
)
dist_graph_path = os.path.join(workspace, 'hetero_dist_graph' if hetero else 'dist_graph') dist_graph_path = os.path.join(
workspace, "hetero_dist_graph" if hetero else "dist_graph"
)
if not os.path.isdir(dist_graph_path): if not os.path.isdir(dist_graph_path):
create_graph(num_part, dist_graph_path, hetero) create_graph(num_part, dist_graph_path, hetero)
base_envs = f"DIST_DGL_TEST_WORKSPACE={workspace} " \ base_envs = (
f"DIST_DGL_TEST_NUM_PART={num_part} " \ f"DIST_DGL_TEST_WORKSPACE={workspace} "
f"DIST_DGL_TEST_NUM_SERVER={num_servers} " \ f"DIST_DGL_TEST_NUM_PART={num_part} "
f"DIST_DGL_TEST_NUM_CLIENT={num_clients} " \ f"DIST_DGL_TEST_NUM_SERVER={num_servers} "
f"DIST_DGL_TEST_NET_TYPE={net_type} " \ f"DIST_DGL_TEST_NUM_CLIENT={num_clients} "
f"DIST_DGL_TEST_GRAPH_PATH={dist_graph_path} " \ f"DIST_DGL_TEST_NET_TYPE={net_type} "
f"DIST_DGL_TEST_IP_CONFIG={ip_config} " f"DIST_DGL_TEST_GRAPH_PATH={dist_graph_path} "
f"DIST_DGL_TEST_IP_CONFIG={ip_config} "
)
procs = [] procs = []
# Start server # Start server
server_id = 0 server_id = 0
for part_id, ip in enumerate(ips): for part_id, ip in enumerate(ips):
for _ in range(num_servers): for _ in range(num_servers):
cmd_envs = base_envs + \ cmd_envs = (
f"DIST_DGL_TEST_SERVER_ID={server_id} " \ base_envs + f"DIST_DGL_TEST_SERVER_ID={server_id} "
f"DIST_DGL_TEST_PART_ID={part_id} " \ f"DIST_DGL_TEST_PART_ID={part_id} "
f"DIST_DGL_TEST_SHARED_MEM={str(int(shared_mem))} " \ f"DIST_DGL_TEST_SHARED_MEM={str(int(shared_mem))} "
f"DIST_DGL_TEST_MODE=server " f"DIST_DGL_TEST_MODE=server "
procs.append(utils.execute_remote( )
f"{cmd_envs} python3 {test_bin}", procs.append(
ip)) utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip)
)
server_id += 1 server_id += 1
# Start client processes # Start client processes
for part_id, ip in enumerate(ips): for part_id, ip in enumerate(ips):
for _ in range(num_clients): for _ in range(num_clients):
cmd_envs = base_envs + \ cmd_envs = (
f"DIST_DGL_TEST_PART_ID={part_id} " \ base_envs + f"DIST_DGL_TEST_PART_ID={part_id} "
f"DIST_DGL_TEST_OBJECT_TYPE={target} " \ f"DIST_DGL_TEST_OBJECT_TYPE={target} "
f"DIST_DGL_TEST_MODE=client " f"DIST_DGL_TEST_MODE=client "
procs.append(utils.execute_remote( )
f"{cmd_envs} python3 {test_bin}", procs.append(
ip)) utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip)
)
for p in procs: for p in procs:
p.join() p.join()
assert p.exitcode == 0 assert p.exitcode == 0
import multiprocessing as mp
import os import os
import unittest import unittest
import pytest import pytest
import multiprocessing as mp
import utils import utils
dgl_envs = f"PYTHONUNBUFFERED=1 DMLC_LOG_DEBUG=1 DGLBACKEND={os.environ.get('DGLBACKEND')} DGL_LIBRARY_PATH={os.environ.get('DGL_LIBRARY_PATH')} PYTHONPATH={os.environ.get('PYTHONPATH')} " dgl_envs = f"PYTHONUNBUFFERED=1 DMLC_LOG_DEBUG=1 DGLBACKEND={os.environ.get('DGLBACKEND')} DGL_LIBRARY_PATH={os.environ.get('DGL_LIBRARY_PATH')} PYTHONPATH={os.environ.get('PYTHONPATH')} "
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe']) @pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_rpc(net_type): def test_rpc(net_type):
ip_config = os.environ.get('DIST_DGL_TEST_IP_CONFIG', 'ip_config.txt') ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt")
num_clients = 1 num_clients = 1
num_servers = 1 num_servers = 1
ips = utils.get_ips(ip_config) ips = utils.get_ips(ip_config)
num_machines = len(ips) num_machines = len(ips)
test_bin = os.path.join(os.environ.get( test_bin = os.path.join(
'DIST_DGL_TEST_PY_BIN_DIR', '.'), 'rpc_basic.py') os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "rpc_basic.py"
base_envs = dgl_envs + \ )
f" DGL_DIST_MODE=distributed DIST_DGL_TEST_IP_CONFIG={ip_config} DIST_DGL_TEST_NUM_SERVERS={num_servers} DIST_DGL_TEST_NET_TYPE={net_type} " base_envs = (
dgl_envs
+ f" DGL_DIST_MODE=distributed DIST_DGL_TEST_IP_CONFIG={ip_config} DIST_DGL_TEST_NUM_SERVERS={num_servers} DIST_DGL_TEST_NET_TYPE={net_type} "
)
procs = [] procs = []
# start server processes # start server processes
server_id = 0 server_id = 0
for ip in ips: for ip in ips:
for _ in range(num_servers): for _ in range(num_servers):
server_envs = base_envs + \ server_envs = (
f" DIST_DGL_TEST_ROLE=server DIST_DGL_TEST_SERVER_ID={server_id} DIST_DGL_TEST_NUM_CLIENTS={num_clients * num_machines} " base_envs
procs.append(utils.execute_remote( + f" DIST_DGL_TEST_ROLE=server DIST_DGL_TEST_SERVER_ID={server_id} DIST_DGL_TEST_NUM_CLIENTS={num_clients * num_machines} "
server_envs + " python3 " + test_bin, ip)) )
procs.append(
utils.execute_remote(server_envs + " python3 " + test_bin, ip)
)
server_id += 1 server_id += 1
# start client processes # start client processes
client_envs = base_envs + " DIST_DGL_TEST_ROLE=client DIST_DGL_TEST_GROUP_ID=0 " client_envs = (
base_envs + " DIST_DGL_TEST_ROLE=client DIST_DGL_TEST_GROUP_ID=0 "
)
for ip in ips: for ip in ips:
for _ in range(num_clients): for _ in range(num_clients):
procs.append(utils.execute_remote( procs.append(
client_envs + " python3 "+test_bin, ip)) utils.execute_remote(client_envs + " python3 " + test_bin, ip)
)
for p in procs: for p in procs:
p.join() p.join()
assert p.exitcode == 0 assert p.exitcode == 0
import subprocess
import multiprocessing as mp import multiprocessing as mp
from typing import Optional
import os import os
import subprocess
from typing import Optional
def run(ssh_cmd): def run(ssh_cmd):
subprocess.check_call(ssh_cmd, shell=True) subprocess.check_call(ssh_cmd, shell=True)
def execute_remote( def execute_remote(
cmd: str, cmd: str, ip: str, port: Optional[int] = 22, username: Optional[str] = ""
ip: str,
port: Optional[int] = 22,
username: Optional[str] = ""
) -> mp.Process: ) -> mp.Process:
"""Execute command line on remote machine via ssh. """Execute command line on remote machine via ssh.
...@@ -30,18 +28,18 @@ def execute_remote( ...@@ -30,18 +28,18 @@ def execute_remote(
if username: if username:
ip_prefix += "{username}@".format(username=username) ip_prefix += "{username}@".format(username=username)
custom_port = os.getenv('DIST_DGL_TEST_SSH_PORT', '') custom_port = os.getenv("DIST_DGL_TEST_SSH_PORT", "")
if custom_port: if custom_port:
port = custom_port port = custom_port
custom_ssh_key = os.getenv('DIST_DGL_TEST_SSH_KEY', '') custom_ssh_key = os.getenv("DIST_DGL_TEST_SSH_KEY", "")
if custom_ssh_key: if custom_ssh_key:
custom_ssh_key = os.path.expanduser(custom_ssh_key) custom_ssh_key = os.path.expanduser(custom_ssh_key)
custom_ssh_key = "-i " + custom_ssh_key custom_ssh_key = "-i " + custom_ssh_key
ssh_setup = os.getenv('DIST_DGL_TEST_SSH_SETUP', '') ssh_setup = os.getenv("DIST_DGL_TEST_SSH_SETUP", "")
if ssh_setup: if ssh_setup:
cmd = ssh_setup + ';' + cmd cmd = ssh_setup + ";" + cmd
# Construct ssh command that executes `cmd` on the remote host # Construct ssh command that executes `cmd` on the remote host
ssh_cmd = "ssh -o StrictHostKeyChecking=no {ssh_key} -p {port} {ip_prefix}{ip} '{cmd}'".format( ssh_cmd = "ssh -o StrictHostKeyChecking=no {ssh_key} -p {port} {ip_prefix}{ip} '{cmd}'".format(
ssh_key=custom_ssh_key, ssh_key=custom_ssh_key,
...@@ -50,11 +48,12 @@ def execute_remote( ...@@ -50,11 +48,12 @@ def execute_remote(
ip=ip, ip=ip,
cmd=cmd, cmd=cmd,
) )
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
proc = ctx.Process(target=run, args=(ssh_cmd,)) proc = ctx.Process(target=run, args=(ssh_cmd,))
proc.start() proc.start()
return proc return proc
def get_ips(ip_config): def get_ips(ip_config):
ips = [] ips = []
with open(ip_config) as f: with open(ip_config) as f:
...@@ -62,6 +61,7 @@ def get_ips(ip_config): ...@@ -62,6 +61,7 @@ def get_ips(ip_config):
result = line.strip().split() result = line.strip().split()
if len(result) != 1: if len(result) != 1:
raise RuntimeError( raise RuntimeError(
"Invalid format of ip_config:{}".format(ip_config)) "Invalid format of ip_config:{}".format(ip_config)
)
ips.append(result[0]) ips.append(result[0])
return ips return ips
import os import os
os.environ['OMP_NUM_THREADS'] = '1'
import dgl os.environ["OMP_NUM_THREADS"] = "1"
import math
import multiprocessing as mp
import pickle
import socket
import sys import sys
import numpy as np
import time import time
import socket
from scipy import sparse as spsp
from numpy.testing import assert_array_equal
from multiprocessing import Process, Manager, Condition, Value
import multiprocessing as mp
from dgl.heterograph_index import create_unitgraph_from_coo
from dgl.data.utils import load_graphs, save_graphs
from dgl.distributed import DistGraphServer, DistGraph
from dgl.distributed import partition_graph, load_partition, load_partition_book, node_split, edge_split
from numpy.testing import assert_almost_equal
import backend as F
import math
import unittest import unittest
import pickle from multiprocessing import Condition, Manager, Process, Value
from utils import reset_envs, generate_ip_config, create_random_graph
import backend as F
import numpy as np
import pytest import pytest
from numpy.testing import assert_almost_equal, assert_array_equal
from scipy import sparse as spsp
from utils import create_random_graph, generate_ip_config, reset_envs
if os.name != 'nt': import dgl
from dgl.data.utils import load_graphs, save_graphs
from dgl.distributed import (
DistGraph,
DistGraphServer,
edge_split,
load_partition,
load_partition_book,
node_split,
partition_graph,
)
from dgl.heterograph_index import create_unitgraph_from_coo
if os.name != "nt":
import fcntl import fcntl
import struct import struct
def run_server(graph_name, server_id, server_count, num_clients, shared_mem, keep_alive=False):
g = DistGraphServer(server_id, "kv_ip_config.txt", server_count, num_clients, def run_server(
'/tmp/dist_graph/{}.json'.format(graph_name), graph_name,
disable_shared_mem=not shared_mem, server_id,
graph_format=['csc', 'coo'], keep_alive=keep_alive) server_count,
print('start server', server_id) num_clients,
shared_mem,
keep_alive=False,
):
g = DistGraphServer(
server_id,
"kv_ip_config.txt",
server_count,
num_clients,
"/tmp/dist_graph/{}.json".format(graph_name),
disable_shared_mem=not shared_mem,
graph_format=["csc", "coo"],
keep_alive=keep_alive,
)
print("start server", server_id)
# verify dtype of underlying graph # verify dtype of underlying graph
cg = g.client_g cg = g.client_g
for k, dtype in dgl.distributed.dist_graph.FIELD_DICT.items(): for k, dtype in dgl.distributed.dist_graph.FIELD_DICT.items():
if k in cg.ndata: if k in cg.ndata:
assert F.dtype( assert (
cg.ndata[k]) == dtype, "Data type of {} in ndata should be {}.".format(k, dtype) F.dtype(cg.ndata[k]) == dtype
), "Data type of {} in ndata should be {}.".format(k, dtype)
if k in cg.edata: if k in cg.edata:
assert F.dtype( assert (
cg.edata[k]) == dtype, "Data type of {} in edata should be {}.".format(k, dtype) F.dtype(cg.edata[k]) == dtype
), "Data type of {} in edata should be {}.".format(k, dtype)
g.start() g.start()
def emb_init(shape, dtype): def emb_init(shape, dtype):
return F.zeros(shape, dtype, F.cpu()) return F.zeros(shape, dtype, F.cpu())
def rand_init(shape, dtype): def rand_init(shape, dtype):
return F.tensor(np.random.normal(size=shape), F.float32) return F.tensor(np.random.normal(size=shape), F.float32)
def check_dist_graph_empty(g, num_clients, num_nodes, num_edges): def check_dist_graph_empty(g, num_clients, num_nodes, num_edges):
# Test API # Test API
assert g.number_of_nodes() == num_nodes assert g.number_of_nodes() == num_nodes
...@@ -55,60 +83,80 @@ def check_dist_graph_empty(g, num_clients, num_nodes, num_edges): ...@@ -55,60 +83,80 @@ def check_dist_graph_empty(g, num_clients, num_nodes, num_edges):
# Test init node data # Test init node data
new_shape = (g.number_of_nodes(), 2) new_shape = (g.number_of_nodes(), 2)
g.ndata['test1'] = dgl.distributed.DistTensor(new_shape, F.int32) g.ndata["test1"] = dgl.distributed.DistTensor(new_shape, F.int32)
nids = F.arange(0, int(g.number_of_nodes() / 2)) nids = F.arange(0, int(g.number_of_nodes() / 2))
feats = g.ndata['test1'][nids] feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 0) assert np.all(F.asnumpy(feats) == 0)
# create a tensor and destroy a tensor and create it again. # create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init) test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3 del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3') test3 = dgl.distributed.DistTensor(
(g.number_of_nodes(), 3), F.float32, "test3"
)
del test3 del test3
# Test write data # Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.ndata['test1'][nids] = new_feats g.ndata["test1"][nids] = new_feats
feats = g.ndata['test1'][nids] feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 1) assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations. # Test metadata operations.
assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()["test1"].dtype == F.int32
print('end') print("end")
def run_client_empty(graph_name, part_id, server_count, num_clients, num_nodes, num_edges):
os.environ['DGL_NUM_SERVER'] = str(server_count) def run_client_empty(
graph_name, part_id, server_count, num_clients, num_nodes, num_edges
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), gpb, graph_name, _, _ = load_partition_book(
part_id, None) "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb) g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_empty(g, num_clients, num_nodes, num_edges) check_dist_graph_empty(g, num_clients, num_nodes, num_edges)
def check_server_client_empty(shared_mem, num_servers, num_clients): def check_server_client_empty(shared_mem, num_servers, num_clients):
prepare_dist(num_servers) prepare_dist(num_servers)
g = create_random_graph(10000) g = create_random_graph(10000)
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = 'dist_graph_test_1' graph_name = "dist_graph_test_1"
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now. # let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine. # We cannot run multiple servers and clients on the same machine.
serv_ps = [] serv_ps = []
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
for serv_id in range(num_servers): for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, p = ctx.Process(
num_clients, shared_mem)) target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p) serv_ps.append(p)
p.start() p.start()
cli_ps = [] cli_ps = []
for cli_id in range(num_clients): for cli_id in range(num_clients):
print('start client', cli_id) print("start client", cli_id)
p = ctx.Process(target=run_client_empty, args=(graph_name, 0, num_servers, num_clients, p = ctx.Process(
g.number_of_nodes(), g.number_of_edges())) target=run_client_empty,
args=(
graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
),
)
p.start() p.start()
cli_ps.append(p) cli_ps.append(p)
...@@ -118,45 +166,79 @@ def check_server_client_empty(shared_mem, num_servers, num_clients): ...@@ -118,45 +166,79 @@ def check_server_client_empty(shared_mem, num_servers, num_clients):
for p in serv_ps: for p in serv_ps:
p.join() p.join()
print('clients have terminated') print("clients have terminated")
def run_client(graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id):
os.environ['DGL_NUM_SERVER'] = str(server_count) def run_client(
os.environ['DGL_GROUP_ID'] = str(group_id) graph_name,
part_id,
server_count,
num_clients,
num_nodes,
num_edges,
group_id,
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), gpb, graph_name, _, _ = load_partition_book(
part_id, None) "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb) g = DistGraph(graph_name, gpb=gpb)
check_dist_graph(g, num_clients, num_nodes, num_edges) check_dist_graph(g, num_clients, num_nodes, num_edges)
def run_emb_client(graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id):
os.environ['DGL_NUM_SERVER'] = str(server_count) def run_emb_client(
os.environ['DGL_GROUP_ID'] = str(group_id) graph_name,
part_id,
server_count,
num_clients,
num_nodes,
num_edges,
group_id,
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
os.environ["DGL_GROUP_ID"] = str(group_id)
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), gpb, graph_name, _, _ = load_partition_book(
part_id, None) "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb) g = DistGraph(graph_name, gpb=gpb)
check_dist_emb(g, num_clients, num_nodes, num_edges) check_dist_emb(g, num_clients, num_nodes, num_edges)
def run_client_hierarchy(graph_name, part_id, server_count, node_mask, edge_mask, return_dict):
os.environ['DGL_NUM_SERVER'] = str(server_count) def run_client_hierarchy(
graph_name, part_id, server_count, node_mask, edge_mask, return_dict
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), gpb, graph_name, _, _ = load_partition_book(
part_id, None) "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb) g = DistGraph(graph_name, gpb=gpb)
node_mask = F.tensor(node_mask) node_mask = F.tensor(node_mask)
edge_mask = F.tensor(edge_mask) edge_mask = F.tensor(edge_mask)
nodes = node_split(node_mask, g.get_partition_book(), node_trainer_ids=g.ndata['trainer_id']) nodes = node_split(
edges = edge_split(edge_mask, g.get_partition_book(), edge_trainer_ids=g.edata['trainer_id']) node_mask,
g.get_partition_book(),
node_trainer_ids=g.ndata["trainer_id"],
)
edges = edge_split(
edge_mask,
g.get_partition_book(),
edge_trainer_ids=g.edata["trainer_id"],
)
rank = g.rank() rank = g.rank()
return_dict[rank] = (nodes, edges) return_dict[rank] = (nodes, edges)
def check_dist_emb(g, num_clients, num_nodes, num_edges): def check_dist_emb(g, num_clients, num_nodes, num_edges):
from dgl.distributed.optim import SparseAdagrad
from dgl.distributed import DistEmbedding from dgl.distributed import DistEmbedding
from dgl.distributed.optim import SparseAdagrad
# Test sparse emb # Test sparse emb
try: try:
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb1', emb_init) emb = DistEmbedding(g.number_of_nodes(), 1, "emb1", emb_init)
nids = F.arange(0, int(g.number_of_nodes())) nids = F.arange(0, int(g.number_of_nodes()))
lr = 0.001 lr = 0.001
optimizer = SparseAdagrad([emb], lr=lr) optimizer = SparseAdagrad([emb], lr=lr)
...@@ -173,14 +255,18 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges): ...@@ -173,14 +255,18 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
feats1 = emb(rest) feats1 = emb(rest)
assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book()) policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book())
grad_sum = dgl.distributed.DistTensor((g.number_of_nodes(), 1), F.float32, grad_sum = dgl.distributed.DistTensor(
'emb1_sum', policy) (g.number_of_nodes(), 1), F.float32, "emb1_sum", policy
)
if num_clients == 1: if num_clients == 1:
assert np.all(F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients) assert np.all(
F.asnumpy(grad_sum[nids])
== np.ones((len(nids), 1)) * num_clients
)
assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1)))
emb = DistEmbedding(g.number_of_nodes(), 1, 'emb2', emb_init) emb = DistEmbedding(g.number_of_nodes(), 1, "emb2", emb_init)
with F.no_grad(): with F.no_grad():
feats1 = emb(nids) feats1 = emb(nids)
assert np.all(F.asnumpy(feats1) == 0) assert np.all(F.asnumpy(feats1) == 0)
...@@ -197,7 +283,9 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges): ...@@ -197,7 +283,9 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
with F.no_grad(): with F.no_grad():
feats = emb(nids) feats = emb(nids)
if num_clients == 1: if num_clients == 1:
assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr) assert_almost_equal(
F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr
)
rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids)) rest = np.setdiff1d(np.arange(g.number_of_nodes()), F.asnumpy(nids))
feats1 = emb(rest) feats1 = emb(rest)
assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1)))
...@@ -207,6 +295,7 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges): ...@@ -207,6 +295,7 @@ def check_dist_emb(g, num_clients, num_nodes, num_edges):
print(e) print(e)
sys.exit(-1) sys.exit(-1)
def check_dist_graph(g, num_clients, num_nodes, num_edges): def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test API # Test API
assert g.number_of_nodes() == num_nodes assert g.number_of_nodes() == num_nodes
...@@ -214,13 +303,13 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges): ...@@ -214,13 +303,13 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test reading node data # Test reading node data
nids = F.arange(0, int(g.number_of_nodes() / 2)) nids = F.arange(0, int(g.number_of_nodes() / 2))
feats1 = g.ndata['features'][nids] feats1 = g.ndata["features"][nids]
feats = F.squeeze(feats1, 1) feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == nids)) assert np.all(F.asnumpy(feats == nids))
# Test reading edge data # Test reading edge data
eids = F.arange(0, int(g.number_of_edges() / 2)) eids = F.arange(0, int(g.number_of_edges() / 2))
feats1 = g.edata['features'][eids] feats1 = g.edata["features"][eids]
feats = F.squeeze(feats1, 1) feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == eids)) assert np.all(F.asnumpy(feats == eids))
...@@ -232,53 +321,68 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges): ...@@ -232,53 +321,68 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
# Test init node data # Test init node data
new_shape = (g.number_of_nodes(), 2) new_shape = (g.number_of_nodes(), 2)
test1 = dgl.distributed.DistTensor(new_shape, F.int32) test1 = dgl.distributed.DistTensor(new_shape, F.int32)
g.ndata['test1'] = test1 g.ndata["test1"] = test1
feats = g.ndata['test1'][nids] feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 0) assert np.all(F.asnumpy(feats) == 0)
assert test1.count_nonzero() == 0 assert test1.count_nonzero() == 0
# reference to a one that exists # reference to a one that exists
test2 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2', init_func=rand_init) test2 = dgl.distributed.DistTensor(
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test2') new_shape, F.float32, "test2", init_func=rand_init
)
test3 = dgl.distributed.DistTensor(new_shape, F.float32, "test2")
assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids])) assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids]))
# create a tensor and destroy a tensor and create it again. # create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init) test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3 del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test3') test3 = dgl.distributed.DistTensor(
(g.number_of_nodes(), 3), F.float32, "test3"
)
del test3 del test3
# add tests for anonymous distributed tensor. # add tests for anonymous distributed tensor.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test3 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
data = test3[0:10] data = test3[0:10]
test4 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test4 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
del test3 del test3
test5 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test5 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
assert np.sum(F.asnumpy(test5[0:10] != data)) > 0 assert np.sum(F.asnumpy(test5[0:10] != data)) > 0
# test a persistent tesnor # test a persistent tesnor
test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init, test4 = dgl.distributed.DistTensor(
persistent=True) new_shape, F.float32, "test4", init_func=rand_init, persistent=True
)
del test4 del test4
try: try:
test4 = dgl.distributed.DistTensor((g.number_of_nodes(), 3), F.float32, 'test4') test4 = dgl.distributed.DistTensor(
raise Exception('') (g.number_of_nodes(), 3), F.float32, "test4"
)
raise Exception("")
except: except:
pass pass
# Test write data # Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.ndata['test1'][nids] = new_feats g.ndata["test1"][nids] = new_feats
feats = g.ndata['test1'][nids] feats = g.ndata["test1"][nids]
assert np.all(F.asnumpy(feats) == 1) assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations. # Test metadata operations.
assert len(g.ndata['features']) == g.number_of_nodes() assert len(g.ndata["features"]) == g.number_of_nodes()
assert g.ndata['features'].shape == (g.number_of_nodes(), 1) assert g.ndata["features"].shape == (g.number_of_nodes(), 1)
assert g.ndata['features'].dtype == F.int64 assert g.ndata["features"].dtype == F.int64
assert g.node_attr_schemes()['features'].dtype == F.int64 assert g.node_attr_schemes()["features"].dtype == F.int64
assert g.node_attr_schemes()['test1'].dtype == F.int32 assert g.node_attr_schemes()["test1"].dtype == F.int32
assert g.node_attr_schemes()['features'].shape == (1,) assert g.node_attr_schemes()["features"].shape == (1,)
selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
# Test node split # Test node split
...@@ -289,40 +393,62 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges): ...@@ -289,40 +393,62 @@ def check_dist_graph(g, num_clients, num_nodes, num_edges):
for n in nodes: for n in nodes:
assert n in local_nids assert n in local_nids
print('end') print("end")
def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_groups=1): def check_dist_emb_server_client(
shared_mem, num_servers, num_clients, num_groups=1
):
prepare_dist(num_servers) prepare_dist(num_servers)
g = create_random_graph(10000) g = create_random_graph(10000)
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = f'check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}' graph_name = (
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) f"check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}"
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) )
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now. # let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine. # We cannot run multiple servers and clients on the same machine.
serv_ps = [] serv_ps = []
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
keep_alive = num_groups > 1 keep_alive = num_groups > 1
for serv_id in range(num_servers): for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, p = ctx.Process(
num_clients, shared_mem, keep_alive)) target=run_server,
args=(
graph_name,
serv_id,
num_servers,
num_clients,
shared_mem,
keep_alive,
),
)
serv_ps.append(p) serv_ps.append(p)
p.start() p.start()
cli_ps = [] cli_ps = []
for cli_id in range(num_clients): for cli_id in range(num_clients):
for group_id in range(num_groups): for group_id in range(num_groups):
print('start client[{}] for group[{}]'.format(cli_id, group_id)) print("start client[{}] for group[{}]".format(cli_id, group_id))
p = ctx.Process(target=run_emb_client, args=(graph_name, 0, num_servers, num_clients, p = ctx.Process(
g.number_of_nodes(), target=run_emb_client,
g.number_of_edges(), args=(
group_id)) graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
group_id,
),
)
p.start() p.start()
time.sleep(1) # avoid race condition when instantiating DistGraph time.sleep(1) # avoid race condition when instantiating DistGraph
cli_ps.append(p) cli_ps.append(p)
for p in cli_ps: for p in cli_ps:
...@@ -337,7 +463,8 @@ def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_group ...@@ -337,7 +463,8 @@ def check_dist_emb_server_client(shared_mem, num_servers, num_clients, num_group
for p in serv_ps: for p in serv_ps:
p.join() p.join()
print('clients have terminated') print("clients have terminated")
def check_server_client(shared_mem, num_servers, num_clients, num_groups=1): def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
prepare_dist(num_servers) prepare_dist(num_servers)
...@@ -345,19 +472,28 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1): ...@@ -345,19 +472,28 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = f'check_server_client_{shared_mem}_{num_servers}_{num_clients}_{num_groups}' graph_name = f"check_server_client_{shared_mem}_{num_servers}_{num_clients}_{num_groups}"
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now. # let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine. # We cannot run multiple servers and clients on the same machine.
serv_ps = [] serv_ps = []
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
keep_alive = num_groups > 1 keep_alive = num_groups > 1
for serv_id in range(num_servers): for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, p = ctx.Process(
num_clients, shared_mem, keep_alive)) target=run_server,
args=(
graph_name,
serv_id,
num_servers,
num_clients,
shared_mem,
keep_alive,
),
)
serv_ps.append(p) serv_ps.append(p)
p.start() p.start()
...@@ -365,11 +501,21 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1): ...@@ -365,11 +501,21 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
cli_ps = [] cli_ps = []
for cli_id in range(num_clients): for cli_id in range(num_clients):
for group_id in range(num_groups): for group_id in range(num_groups):
print('start client[{}] for group[{}]'.format(cli_id, group_id)) print("start client[{}] for group[{}]".format(cli_id, group_id))
p = ctx.Process(target=run_client, args=(graph_name, 0, num_servers, num_clients, g.number_of_nodes(), p = ctx.Process(
g.number_of_edges(), group_id)) target=run_client,
args=(
graph_name,
0,
num_servers,
num_clients,
g.number_of_nodes(),
g.number_of_edges(),
group_id,
),
)
p.start() p.start()
time.sleep(1) # avoid race condition when instantiating DistGraph time.sleep(1) # avoid race condition when instantiating DistGraph
cli_ps.append(p) cli_ps.append(p)
for p in cli_ps: for p in cli_ps:
p.join() p.join()
...@@ -382,7 +528,8 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1): ...@@ -382,7 +528,8 @@ def check_server_client(shared_mem, num_servers, num_clients, num_groups=1):
for p in serv_ps: for p in serv_ps:
p.join() p.join()
print('clients have terminated') print("clients have terminated")
def check_server_client_hierarchy(shared_mem, num_servers, num_clients): def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
prepare_dist(num_servers) prepare_dist(num_servers)
...@@ -390,18 +537,26 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients): ...@@ -390,18 +537,26 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = 'dist_graph_test_2' graph_name = "dist_graph_test_2"
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph', num_trainers_per_machine=num_clients) partition_graph(
g,
graph_name,
num_parts,
"/tmp/dist_graph",
num_trainers_per_machine=num_clients,
)
# let's just test on one partition for now. # let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine. # We cannot run multiple servers and clients on the same machine.
serv_ps = [] serv_ps = []
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
for serv_id in range(num_servers): for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, p = ctx.Process(
num_clients, shared_mem)) target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p) serv_ps.append(p)
p.start() p.start()
...@@ -410,16 +565,29 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients): ...@@ -410,16 +565,29 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
return_dict = manager.dict() return_dict = manager.dict()
node_mask = np.zeros((g.number_of_nodes(),), np.int32) node_mask = np.zeros((g.number_of_nodes(),), np.int32)
edge_mask = np.zeros((g.number_of_edges(),), np.int32) edge_mask = np.zeros((g.number_of_edges(),), np.int32)
nodes = np.random.choice(g.number_of_nodes(), g.number_of_nodes() // 10, replace=False) nodes = np.random.choice(
edges = np.random.choice(g.number_of_edges(), g.number_of_edges() // 10, replace=False) g.number_of_nodes(), g.number_of_nodes() // 10, replace=False
)
edges = np.random.choice(
g.number_of_edges(), g.number_of_edges() // 10, replace=False
)
node_mask[nodes] = 1 node_mask[nodes] = 1
edge_mask[edges] = 1 edge_mask[edges] = 1
nodes = np.sort(nodes) nodes = np.sort(nodes)
edges = np.sort(edges) edges = np.sort(edges)
for cli_id in range(num_clients): for cli_id in range(num_clients):
print('start client', cli_id) print("start client", cli_id)
p = ctx.Process(target=run_client_hierarchy, args=(graph_name, 0, num_servers, p = ctx.Process(
node_mask, edge_mask, return_dict)) target=run_client_hierarchy,
args=(
graph_name,
0,
num_servers,
node_mask,
edge_mask,
return_dict,
),
)
p.start() p.start()
cli_ps.append(p) cli_ps.append(p)
...@@ -438,33 +606,45 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients): ...@@ -438,33 +606,45 @@ def check_server_client_hierarchy(shared_mem, num_servers, num_clients):
assert np.all(F.asnumpy(nodes1) == nodes) assert np.all(F.asnumpy(nodes1) == nodes)
assert np.all(F.asnumpy(edges1) == edges) assert np.all(F.asnumpy(edges1) == edges)
print('clients have terminated') print("clients have terminated")
def run_client_hetero(graph_name, part_id, server_count, num_clients, num_nodes, num_edges): def run_client_hetero(
os.environ['DGL_NUM_SERVER'] = str(server_count) graph_name, part_id, server_count, num_clients, num_nodes, num_edges
):
os.environ["DGL_NUM_SERVER"] = str(server_count)
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name), gpb, graph_name, _, _ = load_partition_book(
part_id, None) "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
)
g = DistGraph(graph_name, gpb=gpb) g = DistGraph(graph_name, gpb=gpb)
check_dist_graph_hetero(g, num_clients, num_nodes, num_edges) check_dist_graph_hetero(g, num_clients, num_nodes, num_edges)
def create_random_hetero(): def create_random_hetero():
num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020}
etypes = [('n1', 'r1', 'n2'), etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
('n1', 'r2', 'n3'),
('n2', 'r3', 'n3')]
edges = {} edges = {}
for etype in etypes: for etype in etypes:
src_ntype, _, dst_ntype = etype src_ntype, _, dst_ntype = etype
arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', arr = spsp.random(
random_state=100) num_nodes[src_ntype],
num_nodes[dst_ntype],
density=0.001,
format="coo",
random_state=100,
)
edges[etype] = (arr.row, arr.col) edges[etype] = (arr.row, arr.col)
g = dgl.heterograph(edges, num_nodes) g = dgl.heterograph(edges, num_nodes)
g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1) g.nodes["n1"].data["feat"] = F.unsqueeze(
g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1) F.arange(0, g.number_of_nodes("n1")), 1
)
g.edges["r1"].data["feat"] = F.unsqueeze(
F.arange(0, g.number_of_edges("r1")), 1
)
return g return g
def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges): def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
# Test API # Test API
for ntype in num_nodes: for ntype in num_nodes:
...@@ -473,9 +653,7 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges): ...@@ -473,9 +653,7 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
for etype in num_edges: for etype in num_edges:
assert etype in g.etypes assert etype in g.etypes
assert num_edges[etype] == g.number_of_edges(etype) assert num_edges[etype] == g.number_of_edges(etype)
etypes = [('n1', 'r1', 'n2'), etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")]
('n1', 'r2', 'n3'),
('n2', 'r3', 'n3')]
for i, etype in enumerate(g.canonical_etypes): for i, etype in enumerate(g.canonical_etypes):
assert etype[0] == etypes[i][0] assert etype[0] == etypes[i][0]
assert etype[1] == etypes[i][1] assert etype[1] == etypes[i][1]
...@@ -484,76 +662,92 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges): ...@@ -484,76 +662,92 @@ def check_dist_graph_hetero(g, num_clients, num_nodes, num_edges):
assert g.number_of_edges() == sum([num_edges[etype] for etype in num_edges]) assert g.number_of_edges() == sum([num_edges[etype] for etype in num_edges])
# Test reading node data # Test reading node data
nids = F.arange(0, int(g.number_of_nodes('n1') / 2)) nids = F.arange(0, int(g.number_of_nodes("n1") / 2))
feats1 = g.nodes['n1'].data['feat'][nids] feats1 = g.nodes["n1"].data["feat"][nids]
feats = F.squeeze(feats1, 1) feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == nids)) assert np.all(F.asnumpy(feats == nids))
# Test reading edge data # Test reading edge data
eids = F.arange(0, int(g.number_of_edges('r1') / 2)) eids = F.arange(0, int(g.number_of_edges("r1") / 2))
feats1 = g.edges['r1'].data['feat'][eids] feats1 = g.edges["r1"].data["feat"][eids]
feats = F.squeeze(feats1, 1) feats = F.squeeze(feats1, 1)
assert np.all(F.asnumpy(feats == eids)) assert np.all(F.asnumpy(feats == eids))
# Test edge_subgraph # Test edge_subgraph
sg = g.edge_subgraph({'r1': eids}) sg = g.edge_subgraph({"r1": eids})
assert sg.num_edges() == len(eids) assert sg.num_edges() == len(eids)
assert F.array_equal(sg.edata[dgl.EID], eids) assert F.array_equal(sg.edata[dgl.EID], eids)
sg = g.edge_subgraph({('n1', 'r1', 'n2'): eids}) sg = g.edge_subgraph({("n1", "r1", "n2"): eids})
assert sg.num_edges() == len(eids) assert sg.num_edges() == len(eids)
assert F.array_equal(sg.edata[dgl.EID], eids) assert F.array_equal(sg.edata[dgl.EID], eids)
# Test init node data # Test init node data
new_shape = (g.number_of_nodes('n1'), 2) new_shape = (g.number_of_nodes("n1"), 2)
g.nodes['n1'].data['test1'] = dgl.distributed.DistTensor(new_shape, F.int32) g.nodes["n1"].data["test1"] = dgl.distributed.DistTensor(new_shape, F.int32)
feats = g.nodes['n1'].data['test1'][nids] feats = g.nodes["n1"].data["test1"][nids]
assert np.all(F.asnumpy(feats) == 0) assert np.all(F.asnumpy(feats) == 0)
# create a tensor and destroy a tensor and create it again. # create a tensor and destroy a tensor and create it again.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, 'test3', init_func=rand_init) test3 = dgl.distributed.DistTensor(
new_shape, F.float32, "test3", init_func=rand_init
)
del test3 del test3
test3 = dgl.distributed.DistTensor((g.number_of_nodes('n1'), 3), F.float32, 'test3') test3 = dgl.distributed.DistTensor(
(g.number_of_nodes("n1"), 3), F.float32, "test3"
)
del test3 del test3
# add tests for anonymous distributed tensor. # add tests for anonymous distributed tensor.
test3 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test3 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
data = test3[0:10] data = test3[0:10]
test4 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test4 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
del test3 del test3
test5 = dgl.distributed.DistTensor(new_shape, F.float32, init_func=rand_init) test5 = dgl.distributed.DistTensor(
new_shape, F.float32, init_func=rand_init
)
assert np.sum(F.asnumpy(test5[0:10] != data)) > 0 assert np.sum(F.asnumpy(test5[0:10] != data)) > 0
# test a persistent tesnor # test a persistent tesnor
test4 = dgl.distributed.DistTensor(new_shape, F.float32, 'test4', init_func=rand_init, test4 = dgl.distributed.DistTensor(
persistent=True) new_shape, F.float32, "test4", init_func=rand_init, persistent=True
)
del test4 del test4
try: try:
test4 = dgl.distributed.DistTensor((g.number_of_nodes('n1'), 3), F.float32, 'test4') test4 = dgl.distributed.DistTensor(
raise Exception('') (g.number_of_nodes("n1"), 3), F.float32, "test4"
)
raise Exception("")
except: except:
pass pass
# Test write data # Test write data
new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) new_feats = F.ones((len(nids), 2), F.int32, F.cpu())
g.nodes['n1'].data['test1'][nids] = new_feats g.nodes["n1"].data["test1"][nids] = new_feats
feats = g.nodes['n1'].data['test1'][nids] feats = g.nodes["n1"].data["test1"][nids]
assert np.all(F.asnumpy(feats) == 1) assert np.all(F.asnumpy(feats) == 1)
# Test metadata operations. # Test metadata operations.
assert len(g.nodes['n1'].data['feat']) == g.number_of_nodes('n1') assert len(g.nodes["n1"].data["feat"]) == g.number_of_nodes("n1")
assert g.nodes['n1'].data['feat'].shape == (g.number_of_nodes('n1'), 1) assert g.nodes["n1"].data["feat"].shape == (g.number_of_nodes("n1"), 1)
assert g.nodes['n1'].data['feat'].dtype == F.int64 assert g.nodes["n1"].data["feat"].dtype == F.int64
selected_nodes = np.random.randint(0, 100, size=g.number_of_nodes('n1')) > 30 selected_nodes = (
np.random.randint(0, 100, size=g.number_of_nodes("n1")) > 30
)
# Test node split # Test node split
nodes = node_split(selected_nodes, g.get_partition_book(), ntype='n1') nodes = node_split(selected_nodes, g.get_partition_book(), ntype="n1")
nodes = F.asnumpy(nodes) nodes = F.asnumpy(nodes)
# We only have one partition, so the local nodes are basically all nodes in the graph. # We only have one partition, so the local nodes are basically all nodes in the graph.
local_nids = np.arange(g.number_of_nodes('n1')) local_nids = np.arange(g.number_of_nodes("n1"))
for n in nodes: for n in nodes:
assert n in local_nids assert n in local_nids
print('end') print("end")
def check_server_client_hetero(shared_mem, num_servers, num_clients): def check_server_client_hetero(shared_mem, num_servers, num_clients):
prepare_dist(num_servers) prepare_dist(num_servers)
...@@ -561,16 +755,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients): ...@@ -561,16 +755,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = 'dist_graph_test_3' graph_name = "dist_graph_test_3"
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
# let's just test on one partition for now. # let's just test on one partition for now.
# We cannot run multiple servers and clients on the same machine. # We cannot run multiple servers and clients on the same machine.
serv_ps = [] serv_ps = []
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
for serv_id in range(num_servers): for serv_id in range(num_servers):
p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers, p = ctx.Process(
num_clients, shared_mem)) target=run_server,
args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
)
serv_ps.append(p) serv_ps.append(p)
p.start() p.start()
...@@ -578,9 +774,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients): ...@@ -578,9 +774,18 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
num_nodes = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes} num_nodes = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
num_edges = {etype: g.number_of_edges(etype) for etype in g.etypes} num_edges = {etype: g.number_of_edges(etype) for etype in g.etypes}
for cli_id in range(num_clients): for cli_id in range(num_clients):
print('start client', cli_id) print("start client", cli_id)
p = ctx.Process(target=run_client_hetero, args=(graph_name, 0, num_servers, num_clients, num_nodes, p = ctx.Process(
num_edges)) target=run_client_hetero,
args=(
graph_name,
0,
num_servers,
num_clients,
num_nodes,
num_edges,
),
)
p.start() p.start()
cli_ps.append(p) cli_ps.append(p)
...@@ -590,14 +795,20 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients): ...@@ -590,14 +795,20 @@ def check_server_client_hetero(shared_mem, num_servers, num_clients):
for p in serv_ps: for p in serv_ps:
p.join() p.join()
print('clients have terminated') print("clients have terminated")
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph") @unittest.skipIf(
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support") dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support some of operations in DistGraph",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support"
)
def test_server_client(): def test_server_client():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
check_server_client_hierarchy(False, 1, 4) check_server_client_hierarchy(False, 1, 4)
check_server_client_empty(True, 1, 1) check_server_client_empty(True, 1, 1)
check_server_client_hetero(True, 1, 1) check_server_client_hetero(True, 1, 1)
...@@ -606,78 +817,110 @@ def test_server_client(): ...@@ -606,78 +817,110 @@ def test_server_client():
check_server_client(False, 1, 1) check_server_client(False, 1, 1)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and # [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now. # root cause is unknown. Let's disable them for now.
#check_server_client(True, 2, 2) # check_server_client(True, 2, 2)
#check_server_client(True, 1, 1, 2) # check_server_client(True, 1, 1, 2)
#check_server_client(False, 1, 1, 2) # check_server_client(False, 1, 1, 2)
#check_server_client(True, 2, 2, 2) # check_server_client(True, 2, 2, 2)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding") @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding") @unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support distributed DistEmbedding",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="Mxnet doesn't support distributed DistEmbedding",
)
def test_dist_emb_server_client(): def test_dist_emb_server_client():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
check_dist_emb_server_client(True, 1, 1) check_dist_emb_server_client(True, 1, 1)
check_dist_emb_server_client(False, 1, 1) check_dist_emb_server_client(False, 1, 1)
# [TODO][Rhett] Tests for multiple groups may fail sometimes and # [TODO][Rhett] Tests for multiple groups may fail sometimes and
# root cause is unknown. Let's disable them for now. # root cause is unknown. Let's disable them for now.
#check_dist_emb_server_client(True, 2, 2) # check_dist_emb_server_client(True, 2, 2)
#check_dist_emb_server_client(True, 1, 1, 2) # check_dist_emb_server_client(True, 1, 1, 2)
#check_dist_emb_server_client(False, 1, 1, 2) # check_dist_emb_server_client(False, 1, 1, 2)
#check_dist_emb_server_client(True, 2, 2, 2) # check_dist_emb_server_client(True, 2, 2, 2)
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support") @unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support some of operations in DistGraph",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support"
)
def test_standalone(): def test_standalone():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'standalone' os.environ["DGL_DIST_MODE"] = "standalone"
g = create_random_graph(10000) g = create_random_graph(10000)
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = 'dist_graph_test_3' graph_name = "dist_graph_test_3"
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) dist_g = DistGraph(
graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name)
)
check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges()) check_dist_graph(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
dgl.distributed.exit_client() # this is needed since there's two test here in one process dgl.distributed.exit_client() # this is needed since there's two test here in one process
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding") @unittest.skipIf(
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding") dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support distributed DistEmbedding",
)
@unittest.skipIf(
dgl.backend.backend_name == "mxnet",
reason="Mxnet doesn't support distributed DistEmbedding",
)
def test_standalone_node_emb(): def test_standalone_node_emb():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'standalone' os.environ["DGL_DIST_MODE"] = "standalone"
g = create_random_graph(10000) g = create_random_graph(10000)
# Partition the graph # Partition the graph
num_parts = 1 num_parts = 1
graph_name = 'dist_graph_test_3' graph_name = "dist_graph_test_3"
g.ndata['features'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.ndata["features"] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1)
g.edata['features'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1)
partition_graph(g, graph_name, num_parts, '/tmp/dist_graph') partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")
dgl.distributed.initialize("kv_ip_config.txt") dgl.distributed.initialize("kv_ip_config.txt")
dist_g = DistGraph(graph_name, part_config='/tmp/dist_graph/{}.json'.format(graph_name)) dist_g = DistGraph(
graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name)
)
check_dist_emb(dist_g, 1, g.number_of_nodes(), g.number_of_edges()) check_dist_emb(dist_g, 1, g.number_of_nodes(), g.number_of_edges())
dgl.distributed.exit_client() # this is needed since there's two test here in one process dgl.distributed.exit_client() # this is needed since there's two test here in one process
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("hetero", [True, False]) @pytest.mark.parametrize("hetero", [True, False])
def test_split(hetero): def test_split(hetero):
if hetero: if hetero:
g = create_random_hetero() g = create_random_hetero()
ntype = 'n1' ntype = "n1"
etype = 'r1' etype = "r1"
else: else:
g = create_random_graph(10000) g = create_random_graph(10000)
ntype = '_N' ntype = "_N"
etype = '_E' etype = "_E"
num_parts = 4 num_parts = 4
num_hops = 2 num_hops = 2
partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') partition_graph(
g,
"dist_graph_test",
num_parts,
"/tmp/dist_graph",
num_hops=num_hops,
part_method="metis",
)
node_mask = np.random.randint(0, 100, size=g.number_of_nodes(ntype)) > 30 node_mask = np.random.randint(0, 100, size=g.number_of_nodes(ntype)) > 30
edge_mask = np.random.randint(0, 100, size=g.number_of_edges(etype)) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges(etype)) > 30
...@@ -688,14 +931,18 @@ def test_split(hetero): ...@@ -688,14 +931,18 @@ def test_split(hetero):
# to determine how to split the workloads. Here is to simulate the multi-client # to determine how to split the workloads. Here is to simulate the multi-client
# use case. # use case.
def set_roles(num_clients): def set_roles(num_clients):
dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.CUR_ROLE = "default"
dgl.distributed.role.GLOBAL_RANK = {i:i for i in range(num_clients)} dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK['default'] = {i:i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK["default"] = {
i: i for i in range(num_clients)
}
for i in range(num_parts): for i in range(num_parts):
set_roles(num_parts) set_roles(num_parts)
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition('/tmp/dist_graph/dist_graph_test.json', i) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
local_nids = F.nonzero_1d(part_g.ndata['inner_node']) "/tmp/dist_graph/dist_graph_test.json", i
)
local_nids = F.nonzero_1d(part_g.ndata["inner_node"])
local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
if hetero: if hetero:
ntype_ids, nids = gpb.map_to_per_ntype(local_nids) ntype_ids, nids = gpb.map_to_per_ntype(local_nids)
...@@ -703,19 +950,25 @@ def test_split(hetero): ...@@ -703,19 +950,25 @@ def test_split(hetero):
else: else:
local_nids = F.asnumpy(local_nids) local_nids = F.asnumpy(local_nids)
nodes1 = np.intersect1d(selected_nodes, local_nids) nodes1 = np.intersect1d(selected_nodes, local_nids)
nodes2 = node_split(node_mask, gpb, ntype=ntype, rank=i, force_even=False) nodes2 = node_split(
node_mask, gpb, ntype=ntype, rank=i, force_even=False
)
assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2)))
for n in F.asnumpy(nodes2): for n in F.asnumpy(nodes2):
assert n in local_nids assert n in local_nids
set_roles(num_parts * 2) set_roles(num_parts * 2)
nodes3 = node_split(node_mask, gpb, ntype=ntype, rank=i * 2, force_even=False) nodes3 = node_split(
nodes4 = node_split(node_mask, gpb, ntype=ntype, rank=i * 2 + 1, force_even=False) node_mask, gpb, ntype=ntype, rank=i * 2, force_even=False
)
nodes4 = node_split(
node_mask, gpb, ntype=ntype, rank=i * 2 + 1, force_even=False
)
nodes5 = F.cat([nodes3, nodes4], 0) nodes5 = F.cat([nodes3, nodes4], 0)
assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5))) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5)))
set_roles(num_parts) set_roles(num_parts)
local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.nonzero_1d(part_g.edata["inner_edge"])
local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
if hetero: if hetero:
etype_ids, eids = gpb.map_to_per_etype(local_eids) etype_ids, eids = gpb.map_to_per_etype(local_eids)
...@@ -723,23 +976,37 @@ def test_split(hetero): ...@@ -723,23 +976,37 @@ def test_split(hetero):
else: else:
local_eids = F.asnumpy(local_eids) local_eids = F.asnumpy(local_eids)
edges1 = np.intersect1d(selected_edges, local_eids) edges1 = np.intersect1d(selected_edges, local_eids)
edges2 = edge_split(edge_mask, gpb, etype=etype, rank=i, force_even=False) edges2 = edge_split(
edge_mask, gpb, etype=etype, rank=i, force_even=False
)
assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2)))
for e in F.asnumpy(edges2): for e in F.asnumpy(edges2):
assert e in local_eids assert e in local_eids
set_roles(num_parts * 2) set_roles(num_parts * 2)
edges3 = edge_split(edge_mask, gpb, etype=etype, rank=i * 2, force_even=False) edges3 = edge_split(
edges4 = edge_split(edge_mask, gpb, etype=etype, rank=i * 2 + 1, force_even=False) edge_mask, gpb, etype=etype, rank=i * 2, force_even=False
)
edges4 = edge_split(
edge_mask, gpb, etype=etype, rank=i * 2 + 1, force_even=False
)
edges5 = F.cat([edges3, edges4], 0) edges5 = F.cat([edges3, edges4], 0)
assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5))) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5)))
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_split_even(): def test_split_even():
g = create_random_graph(10000) g = create_random_graph(10000)
num_parts = 4 num_parts = 4
num_hops = 2 num_hops = 2
partition_graph(g, 'dist_graph_test', num_parts, '/tmp/dist_graph', num_hops=num_hops, part_method='metis') partition_graph(
g,
"dist_graph_test",
num_parts,
"/tmp/dist_graph",
num_hops=num_hops,
part_method="metis",
)
node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30 node_mask = np.random.randint(0, 100, size=g.number_of_nodes()) > 30
edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30 edge_mask = np.random.randint(0, 100, size=g.number_of_edges()) > 30
...@@ -754,19 +1021,27 @@ def test_split_even(): ...@@ -754,19 +1021,27 @@ def test_split_even():
# to determine how to split the workloads. Here is to simulate the multi-client # to determine how to split the workloads. Here is to simulate the multi-client
# use case. # use case.
def set_roles(num_clients): def set_roles(num_clients):
dgl.distributed.role.CUR_ROLE = 'default' dgl.distributed.role.CUR_ROLE = "default"
dgl.distributed.role.GLOBAL_RANK = {i:i for i in range(num_clients)} dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)}
dgl.distributed.role.PER_ROLE_RANK['default'] = {i:i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK["default"] = {
i: i for i in range(num_clients)
}
for i in range(num_parts): for i in range(num_parts):
set_roles(num_parts) set_roles(num_parts)
part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition('/tmp/dist_graph/dist_graph_test.json', i) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition(
local_nids = F.nonzero_1d(part_g.ndata['inner_node']) "/tmp/dist_graph/dist_graph_test.json", i
)
local_nids = F.nonzero_1d(part_g.ndata["inner_node"])
local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids)
nodes = node_split(node_mask, gpb, rank=i, force_even=True) nodes = node_split(node_mask, gpb, rank=i, force_even=True)
all_nodes1.append(nodes) all_nodes1.append(nodes)
subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids))
print('part {} get {} nodes and {} are in the partition'.format(i, len(nodes), len(subset))) print(
"part {} get {} nodes and {} are in the partition".format(
i, len(nodes), len(subset)
)
)
set_roles(num_parts * 2) set_roles(num_parts * 2)
nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True) nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True)
...@@ -774,15 +1049,19 @@ def test_split_even(): ...@@ -774,15 +1049,19 @@ def test_split_even():
nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0)) nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0))
all_nodes2.append(nodes3) all_nodes2.append(nodes3)
subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3))
print('intersection has', len(subset)) print("intersection has", len(subset))
set_roles(num_parts) set_roles(num_parts)
local_eids = F.nonzero_1d(part_g.edata['inner_edge']) local_eids = F.nonzero_1d(part_g.edata["inner_edge"])
local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids)
edges = edge_split(edge_mask, gpb, rank=i, force_even=True) edges = edge_split(edge_mask, gpb, rank=i, force_even=True)
all_edges1.append(edges) all_edges1.append(edges)
subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids))
print('part {} get {} edges and {} are in the partition'.format(i, len(edges), len(subset))) print(
"part {} get {} edges and {} are in the partition".format(
i, len(edges), len(subset)
)
)
set_roles(num_parts * 2) set_roles(num_parts * 2)
edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True) edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True)
...@@ -790,7 +1069,7 @@ def test_split_even(): ...@@ -790,7 +1069,7 @@ def test_split_even():
edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0)) edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0))
all_edges2.append(edges3) all_edges2.append(edges3)
subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3))
print('intersection has', len(subset)) print("intersection has", len(subset))
all_nodes1 = F.cat(all_nodes1, 0) all_nodes1 = F.cat(all_nodes1, 0)
all_edges1 = F.cat(all_edges1, 0) all_edges1 = F.cat(all_edges1, 0)
all_nodes2 = F.cat(all_nodes2, 0) all_nodes2 = F.cat(all_nodes2, 0)
...@@ -802,11 +1081,13 @@ def test_split_even(): ...@@ -802,11 +1081,13 @@ def test_split_even():
assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_nodes == F.asnumpy(all_nodes2))
assert np.all(all_edges == F.asnumpy(all_edges2)) assert np.all(all_edges == F.asnumpy(all_edges2))
def prepare_dist(num_servers=1): def prepare_dist(num_servers=1):
generate_ip_config("kv_ip_config.txt", 1, num_servers=num_servers) generate_ip_config("kv_ip_config.txt", 1, num_servers=num_servers)
if __name__ == '__main__':
os.makedirs('/tmp/dist_graph', exist_ok=True) if __name__ == "__main__":
os.makedirs("/tmp/dist_graph", exist_ok=True)
test_dist_emb_server_client() test_dist_emb_server_client()
test_server_client() test_server_client()
test_split(True) test_split(True)
......
import multiprocessing as mp
import os import os
import socket
import time import time
import unittest
import backend as F
import numpy as np import numpy as np
import socket from numpy.testing import assert_array_equal
from scipy import sparse as spsp from scipy import sparse as spsp
from utils import generate_ip_config, reset_envs
import dgl import dgl
import backend as F
import unittest
from dgl.graph_index import create_graph_index from dgl.graph_index import create_graph_index
import multiprocessing as mp
from numpy.testing import assert_array_equal
from utils import generate_ip_config, reset_envs
if os.name != 'nt': if os.name != "nt":
import fcntl import fcntl
import struct import struct
# Create an one-part Graph # Create an one-part Graph
node_map = F.tensor([0,0,0,0,0,0], F.int64) node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64)
edge_map = F.tensor([0,0,0,0,0,0,0], F.int64) edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)
global_nid = F.tensor([0,1,2,3,4,5], F.int64) global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64)
global_eid = F.tensor([0,1,2,3,4,5,6], F.int64) global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)
g = dgl.DGLGraph() g = dgl.DGLGraph()
g.add_nodes(6) g.add_nodes(6)
g.add_edges(0, 1) # 0 g.add_edges(0, 1) # 0
g.add_edges(0, 2) # 1 g.add_edges(0, 2) # 1
g.add_edges(0, 3) # 2 g.add_edges(0, 3) # 2
g.add_edges(2, 3) # 3 g.add_edges(2, 3) # 3
g.add_edges(1, 1) # 4 g.add_edges(1, 1) # 4
g.add_edges(0, 4) # 5 g.add_edges(0, 4) # 5
g.add_edges(2, 5) # 6 g.add_edges(2, 5) # 6
g.ndata[dgl.NID] = global_nid g.ndata[dgl.NID] = global_nid
g.edata[dgl.EID] = global_eid g.edata[dgl.EID] = global_eid
gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(part_id=0, gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(
num_parts=1, part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map, part_graph=g
node_map=node_map, )
edge_map=edge_map,
part_graph=g) node_policy = dgl.distributed.PartitionPolicy(
policy_str="node:_N", partition_book=gpb
)
node_policy = dgl.distributed.PartitionPolicy(policy_str='node:_N', edge_policy = dgl.distributed.PartitionPolicy(
partition_book=gpb) policy_str="edge:_E", partition_book=gpb
)
edge_policy = dgl.distributed.PartitionPolicy(policy_str='edge:_E', data_0 = F.tensor(
partition_book=gpb) [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]],
F.float32,
)
data_0_1 = F.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], F.float32)
data_0_2 = F.tensor([1, 2, 3, 4, 5, 6], F.int32)
data_0_3 = F.tensor([1, 2, 3, 4, 5, 6], F.int64)
data_1 = F.tensor(
[
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
[2.0, 2.0],
],
F.float32,
)
data_2 = F.tensor(
[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]],
F.float32,
)
data_0 = F.tensor([[1.,1.],[1.,1.],[1.,1.],[1.,1.],[1.,1.],[1.,1.]], F.float32)
data_0_1 = F.tensor([1.,2.,3.,4.,5.,6.], F.float32)
data_0_2 = F.tensor([1,2,3,4,5,6], F.int32)
data_0_3 = F.tensor([1,2,3,4,5,6], F.int64)
data_1 = F.tensor([[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.],[2.,2.]], F.float32)
data_2 = F.tensor([[0.,0.],[0.,0.],[0.,0.],[0.,0.],[0.,0.],[0.,0.]], F.float32)
def init_zero_func(shape, dtype): def init_zero_func(shape, dtype):
return F.zeros(shape, dtype, F.cpu()) return F.zeros(shape, dtype, F.cpu())
def udf_push(target, name, id_tensor, data_tensor): def udf_push(target, name, id_tensor, data_tensor):
target[name][id_tensor] = data_tensor * data_tensor target[name][id_tensor] = data_tensor * data_tensor
def add_push(target, name, id_tensor, data_tensor): def add_push(target, name, id_tensor, data_tensor):
target[name][id_tensor] += data_tensor target[name][id_tensor] += data_tensor
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_partition_policy(): def test_partition_policy():
assert node_policy.part_id == 0 assert node_policy.part_id == 0
assert edge_policy.part_id == 0 assert edge_policy.part_id == 0
local_nid = node_policy.to_local(F.tensor([0,1,2,3,4,5])) local_nid = node_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5]))
local_eid = edge_policy.to_local(F.tensor([0,1,2,3,4,5,6])) local_eid = edge_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5, 6]))
assert_array_equal(F.asnumpy(local_nid), F.asnumpy(F.tensor([0,1,2,3,4,5], F.int64))) assert_array_equal(
assert_array_equal(F.asnumpy(local_eid), F.asnumpy(F.tensor([0,1,2,3,4,5,6], F.int64))) F.asnumpy(local_nid), F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5], F.int64))
nid_partid = node_policy.to_partid(F.tensor([0,1,2,3,4,5], F.int64)) )
eid_partid = edge_policy.to_partid(F.tensor([0,1,2,3,4,5,6], F.int64)) assert_array_equal(
assert_array_equal(F.asnumpy(nid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0], F.int64))) F.asnumpy(local_eid),
assert_array_equal(F.asnumpy(eid_partid), F.asnumpy(F.tensor([0,0,0,0,0,0,0], F.int64))) F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)),
)
nid_partid = node_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5], F.int64))
eid_partid = edge_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64))
assert_array_equal(
F.asnumpy(nid_partid), F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0], F.int64))
)
assert_array_equal(
F.asnumpy(eid_partid),
F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)),
)
assert node_policy.get_part_size() == len(node_map) assert node_policy.get_part_size() == len(node_map)
assert edge_policy.get_part_size() == len(edge_map) assert edge_policy.get_part_size() == len(edge_map)
def start_server(server_id, num_clients, num_servers): def start_server(server_id, num_clients, num_servers):
# Init kvserver # Init kvserver
print("Sleep 5 seconds to test client re-connect.") print("Sleep 5 seconds to test client re-connect.")
time.sleep(5) time.sleep(5)
kvserver = dgl.distributed.KVServer(server_id=server_id, kvserver = dgl.distributed.KVServer(
ip_config='kv_ip_config.txt', server_id=server_id,
num_servers=num_servers, ip_config="kv_ip_config.txt",
num_clients=num_clients) num_servers=num_servers,
num_clients=num_clients,
)
kvserver.add_part_policy(node_policy) kvserver.add_part_policy(node_policy)
kvserver.add_part_policy(edge_policy) kvserver.add_part_policy(edge_policy)
if kvserver.is_backup_server(): if kvserver.is_backup_server():
kvserver.init_data('data_0', 'node:_N') kvserver.init_data("data_0", "node:_N")
kvserver.init_data('data_0_1', 'node:_N') kvserver.init_data("data_0_1", "node:_N")
kvserver.init_data('data_0_2', 'node:_N') kvserver.init_data("data_0_2", "node:_N")
kvserver.init_data('data_0_3', 'node:_N') kvserver.init_data("data_0_3", "node:_N")
else: else:
kvserver.init_data('data_0', 'node:_N', data_0) kvserver.init_data("data_0", "node:_N", data_0)
kvserver.init_data('data_0_1', 'node:_N', data_0_1) kvserver.init_data("data_0_1", "node:_N", data_0_1)
kvserver.init_data('data_0_2', 'node:_N', data_0_2) kvserver.init_data("data_0_2", "node:_N", data_0_2)
kvserver.init_data('data_0_3', 'node:_N', data_0_3) kvserver.init_data("data_0_3", "node:_N", data_0_3)
# start server # start server
server_state = dgl.distributed.ServerState(kv_store=kvserver, local_g=None, partition_book=None) server_state = dgl.distributed.ServerState(
dgl.distributed.start_server(server_id=server_id, kv_store=kvserver, local_g=None, partition_book=None
ip_config='kv_ip_config.txt', )
num_servers=num_servers, dgl.distributed.start_server(
num_clients=num_clients, server_id=server_id,
server_state=server_state) ip_config="kv_ip_config.txt",
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state,
)
def start_server_mul_role(server_id, num_clients, num_servers): def start_server_mul_role(server_id, num_clients, num_servers):
# Init kvserver # Init kvserver
kvserver = dgl.distributed.KVServer(server_id=server_id, kvserver = dgl.distributed.KVServer(
ip_config='kv_ip_mul_config.txt', server_id=server_id,
num_servers=num_servers, ip_config="kv_ip_mul_config.txt",
num_clients=num_clients) num_servers=num_servers,
num_clients=num_clients,
)
kvserver.add_part_policy(node_policy) kvserver.add_part_policy(node_policy)
if kvserver.is_backup_server(): if kvserver.is_backup_server():
kvserver.init_data('data_0', 'node:_N') kvserver.init_data("data_0", "node:_N")
else: else:
kvserver.init_data('data_0', 'node:_N', data_0) kvserver.init_data("data_0", "node:_N", data_0)
# start server # start server
server_state = dgl.distributed.ServerState(kv_store=kvserver, local_g=None, partition_book=None) server_state = dgl.distributed.ServerState(
dgl.distributed.start_server(server_id=server_id, kv_store=kvserver, local_g=None, partition_book=None
ip_config='kv_ip_mul_config.txt', )
num_servers=num_servers, dgl.distributed.start_server(
num_clients=num_clients, server_id=server_id,
server_state=server_state) ip_config="kv_ip_mul_config.txt",
num_servers=num_servers,
num_clients=num_clients,
server_state=server_state,
)
def start_client(num_clients, num_servers): def start_client(num_clients, num_servers):
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
# Note: connect to server first ! # Note: connect to server first !
dgl.distributed.initialize(ip_config='kv_ip_config.txt') dgl.distributed.initialize(ip_config="kv_ip_config.txt")
# Init kvclient # Init kvclient
kvclient = dgl.distributed.KVClient(ip_config='kv_ip_config.txt', num_servers=num_servers) kvclient = dgl.distributed.KVClient(
ip_config="kv_ip_config.txt", num_servers=num_servers
)
kvclient.map_shared_data(partition_book=gpb) kvclient.map_shared_data(partition_book=gpb)
assert dgl.distributed.get_num_client() == num_clients assert dgl.distributed.get_num_client() == num_clients
kvclient.init_data(name='data_1', kvclient.init_data(
shape=F.shape(data_1), name="data_1",
dtype=F.dtype(data_1), shape=F.shape(data_1),
part_policy=edge_policy, dtype=F.dtype(data_1),
init_func=init_zero_func) part_policy=edge_policy,
kvclient.init_data(name='data_2', init_func=init_zero_func,
shape=F.shape(data_2), )
dtype=F.dtype(data_2), kvclient.init_data(
part_policy=node_policy, name="data_2",
init_func=init_zero_func) shape=F.shape(data_2),
dtype=F.dtype(data_2),
part_policy=node_policy,
init_func=init_zero_func,
)
# Test data_name_list # Test data_name_list
name_list = kvclient.data_name_list() name_list = kvclient.data_name_list()
print(name_list) print(name_list)
assert 'data_0' in name_list assert "data_0" in name_list
assert 'data_0_1' in name_list assert "data_0_1" in name_list
assert 'data_0_2' in name_list assert "data_0_2" in name_list
assert 'data_0_3' in name_list assert "data_0_3" in name_list
assert 'data_1' in name_list assert "data_1" in name_list
assert 'data_2' in name_list assert "data_2" in name_list
# Test get_meta_data # Test get_meta_data
meta = kvclient.get_data_meta('data_0') meta = kvclient.get_data_meta("data_0")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_0) assert dtype == F.dtype(data_0)
assert shape == F.shape(data_0) assert shape == F.shape(data_0)
assert policy.policy_str == 'node:_N' assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_1') meta = kvclient.get_data_meta("data_0_1")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_0_1) assert dtype == F.dtype(data_0_1)
assert shape == F.shape(data_0_1) assert shape == F.shape(data_0_1)
assert policy.policy_str == 'node:_N' assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_2') meta = kvclient.get_data_meta("data_0_2")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_0_2) assert dtype == F.dtype(data_0_2)
assert shape == F.shape(data_0_2) assert shape == F.shape(data_0_2)
assert policy.policy_str == 'node:_N' assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_0_3') meta = kvclient.get_data_meta("data_0_3")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_0_3) assert dtype == F.dtype(data_0_3)
assert shape == F.shape(data_0_3) assert shape == F.shape(data_0_3)
assert policy.policy_str == 'node:_N' assert policy.policy_str == "node:_N"
meta = kvclient.get_data_meta('data_1') meta = kvclient.get_data_meta("data_1")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_1) assert dtype == F.dtype(data_1)
assert shape == F.shape(data_1) assert shape == F.shape(data_1)
assert policy.policy_str == 'edge:_E' assert policy.policy_str == "edge:_E"
meta = kvclient.get_data_meta('data_2') meta = kvclient.get_data_meta("data_2")
dtype, shape, policy = meta dtype, shape, policy = meta
assert dtype == F.dtype(data_2) assert dtype == F.dtype(data_2)
assert shape == F.shape(data_2) assert shape == F.shape(data_2)
assert policy.policy_str == 'node:_N' assert policy.policy_str == "node:_N"
# Test push and pull # Test push and pull
id_tensor = F.tensor([0,2,4], F.int64) id_tensor = F.tensor([0, 2, 4], F.int64)
data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32) data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32)
kvclient.push(name='data_0', kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor)
id_tensor=id_tensor, kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor)
data_tensor=data_tensor) kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name='data_1', res = kvclient.pull(name="data_0", id_tensor=id_tensor)
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_2',
id_tensor=id_tensor,
data_tensor=data_tensor)
res = kvclient.pull(name='data_0', id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_1', id_tensor=id_tensor) res = kvclient.pull(name="data_1", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_2', id_tensor=id_tensor) res = kvclient.pull(name="data_2", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
# Register new push handler # Register new push handler
kvclient.register_push_handler('data_0', udf_push) kvclient.register_push_handler("data_0", udf_push)
kvclient.register_push_handler('data_1', udf_push) kvclient.register_push_handler("data_1", udf_push)
kvclient.register_push_handler('data_2', udf_push) kvclient.register_push_handler("data_2", udf_push)
# Test push and pull # Test push and pull
kvclient.push(name='data_0', kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor)
id_tensor=id_tensor, kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor)
data_tensor=data_tensor) kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor)
kvclient.push(name='data_1',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.push(name='data_2',
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.barrier() kvclient.barrier()
data_tensor = data_tensor * data_tensor data_tensor = data_tensor * data_tensor
res = kvclient.pull(name='data_0', id_tensor=id_tensor) res = kvclient.pull(name="data_0", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_1', id_tensor=id_tensor) res = kvclient.pull(name="data_1", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
res = kvclient.pull(name='data_2', id_tensor=id_tensor) res = kvclient.pull(name="data_2", id_tensor=id_tensor)
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
# Test delete data # Test delete data
kvclient.delete_data('data_0') kvclient.delete_data("data_0")
kvclient.delete_data('data_1') kvclient.delete_data("data_1")
kvclient.delete_data('data_2') kvclient.delete_data("data_2")
# Register new push handler # Register new push handler
kvclient.init_data(name='data_3', kvclient.init_data(
shape=F.shape(data_2), name="data_3",
dtype=F.dtype(data_2), shape=F.shape(data_2),
part_policy=node_policy, dtype=F.dtype(data_2),
init_func=init_zero_func) part_policy=node_policy,
kvclient.register_push_handler('data_3', add_push) init_func=init_zero_func,
data_tensor = F.tensor([[6.,6.],[6.,6.],[6.,6.]], F.float32) )
kvclient.register_push_handler("data_3", add_push)
data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32)
kvclient.barrier() kvclient.barrier()
time.sleep(kvclient.client_id + 1) time.sleep(kvclient.client_id + 1)
print("add...") print("add...")
kvclient.push(name='data_3', kvclient.push(name="data_3", id_tensor=id_tensor, data_tensor=data_tensor)
id_tensor=id_tensor,
data_tensor=data_tensor)
kvclient.barrier() kvclient.barrier()
res = kvclient.pull(name='data_3', id_tensor=id_tensor) res = kvclient.pull(name="data_3", id_tensor=id_tensor)
data_tensor = data_tensor * num_clients data_tensor = data_tensor * num_clients
assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor))
def start_client_mul_role(i): def start_client_mul_role(i):
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
# Initialize creates kvstore ! # Initialize creates kvstore !
dgl.distributed.initialize(ip_config='kv_ip_mul_config.txt') dgl.distributed.initialize(ip_config="kv_ip_mul_config.txt")
if i == 0: # block one trainer if i == 0: # block one trainer
time.sleep(5) time.sleep(5)
kvclient = dgl.distributed.kvstore.get_kvstore() kvclient = dgl.distributed.kvstore.get_kvstore()
kvclient.barrier() kvclient.barrier()
...@@ -266,26 +312,39 @@ def start_client_mul_role(i): ...@@ -266,26 +312,39 @@ def start_client_mul_role(i):
assert dgl.distributed.role.get_num_trainers() == 2 assert dgl.distributed.role.get_num_trainers() == 2
assert dgl.distributed.role.get_trainer_rank() < 2 assert dgl.distributed.role.get_trainer_rank() < 2
print('trainer rank: %d, global rank: %d' % (dgl.distributed.role.get_trainer_rank(), print(
dgl.distributed.role.get_global_rank())) "trainer rank: %d, global rank: %d"
% (
dgl.distributed.role.get_trainer_rank(),
dgl.distributed.role.get_global_rank(),
)
)
dgl.distributed.exit_client() dgl.distributed.exit_client()
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_kv_store(): def test_kv_store():
reset_envs() reset_envs()
num_servers = 2 num_servers = 2
num_clients = 2 num_clients = 2
generate_ip_config("kv_ip_config.txt", 1, num_servers) generate_ip_config("kv_ip_config.txt", 1, num_servers)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver_list = [] pserver_list = []
pclient_list = [] pclient_list = []
os.environ['DGL_NUM_SERVER'] = str(num_servers) os.environ["DGL_NUM_SERVER"] = str(num_servers)
for i in range(num_servers): for i in range(num_servers):
pserver = ctx.Process(target=start_server, args=(i, num_clients, num_servers)) pserver = ctx.Process(
target=start_server, args=(i, num_clients, num_servers)
)
pserver.start() pserver.start()
pserver_list.append(pserver) pserver_list.append(pserver)
for i in range(num_clients): for i in range(num_clients):
pclient = ctx.Process(target=start_client, args=(num_clients, num_servers)) pclient = ctx.Process(
target=start_client, args=(num_clients, num_servers)
)
pclient.start() pclient.start()
pclient_list.append(pclient) pclient_list.append(pclient)
for i in range(num_clients): for i in range(num_clients):
...@@ -293,7 +352,11 @@ def test_kv_store(): ...@@ -293,7 +352,11 @@ def test_kv_store():
for i in range(num_servers): for i in range(num_servers):
pserver_list[i].join() pserver_list[i].join()
@unittest.skipIf(os.name == 'nt' or os.getenv('DGLBACKEND') == 'tensorflow', reason='Do not support windows and TF yet')
@unittest.skipIf(
os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow",
reason="Do not support windows and TF yet",
)
def test_kv_multi_role(): def test_kv_multi_role():
reset_envs() reset_envs()
num_servers = 2 num_servers = 2
...@@ -302,13 +365,15 @@ def test_kv_multi_role(): ...@@ -302,13 +365,15 @@ def test_kv_multi_role():
generate_ip_config("kv_ip_mul_config.txt", 1, num_servers) generate_ip_config("kv_ip_mul_config.txt", 1, num_servers)
# There are two trainer processes and each trainer process has two sampler processes. # There are two trainer processes and each trainer process has two sampler processes.
num_clients = num_trainers * (1 + num_samplers) num_clients = num_trainers * (1 + num_samplers)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver_list = [] pserver_list = []
pclient_list = [] pclient_list = []
os.environ['DGL_NUM_SAMPLER'] = str(num_samplers) os.environ["DGL_NUM_SAMPLER"] = str(num_samplers)
os.environ['DGL_NUM_SERVER'] = str(num_servers) os.environ["DGL_NUM_SERVER"] = str(num_servers)
for i in range(num_servers): for i in range(num_servers):
pserver = ctx.Process(target=start_server_mul_role, args=(i, num_clients, num_servers)) pserver = ctx.Process(
target=start_server_mul_role, args=(i, num_clients, num_servers)
)
pserver.start() pserver.start()
pserver_list.append(pserver) pserver_list.append(pserver)
for i in range(num_trainers): for i in range(num_trainers):
...@@ -320,7 +385,8 @@ def test_kv_multi_role(): ...@@ -320,7 +385,8 @@ def test_kv_multi_role():
for i in range(num_servers): for i in range(num_servers):
pserver_list[i].join() pserver_list[i].join()
if __name__ == '__main__':
if __name__ == "__main__":
test_partition_policy() test_partition_policy()
test_kv_store() test_kv_store()
test_kv_multi_role() test_kv_multi_role()
import multiprocessing as mp
import os import os
import time
import socket import socket
import time
import unittest
import dgl
import backend as F import backend as F
import unittest, pytest import pytest
import multiprocessing as mp
from numpy.testing import assert_array_equal from numpy.testing import assert_array_equal
from utils import reset_envs, generate_ip_config from utils import generate_ip_config, reset_envs
if os.name != 'nt': import dgl
if os.name != "nt":
import fcntl import fcntl
import struct import struct
INTEGER = 2 INTEGER = 2
STR = 'hello world!' STR = "hello world!"
HELLO_SERVICE_ID = 901231 HELLO_SERVICE_ID = 901231
TENSOR = F.zeros((1000, 1000), F.int64, F.cpu()) TENSOR = F.zeros((1000, 1000), F.int64, F.cpu())
def foo(x, y): def foo(x, y):
assert x == 123 assert x == 123
assert y == "abc" assert y == "abc"
class MyRequest(dgl.distributed.Request): class MyRequest(dgl.distributed.Request):
def __init__(self): def __init__(self):
self.x = 123 self.x = 123
...@@ -38,6 +42,7 @@ class MyRequest(dgl.distributed.Request): ...@@ -38,6 +42,7 @@ class MyRequest(dgl.distributed.Request):
def process_request(self, server_state): def process_request(self, server_state):
pass pass
class MyResponse(dgl.distributed.Response): class MyResponse(dgl.distributed.Response):
def __init__(self): def __init__(self):
self.x = 432 self.x = 432
...@@ -47,10 +52,12 @@ class MyResponse(dgl.distributed.Response): ...@@ -47,10 +52,12 @@ class MyResponse(dgl.distributed.Response):
def __setstate__(self, state): def __setstate__(self, state):
self.x = state self.x = state
def simple_func(tensor): def simple_func(tensor):
return tensor return tensor
class HelloResponse(dgl.distributed.Response): class HelloResponse(dgl.distributed.Response):
def __init__(self, hello_str, integer, tensor): def __init__(self, hello_str, integer, tensor):
self.hello_str = hello_str self.hello_str = hello_str
...@@ -63,6 +70,7 @@ class HelloResponse(dgl.distributed.Response): ...@@ -63,6 +70,7 @@ class HelloResponse(dgl.distributed.Response):
def __setstate__(self, state): def __setstate__(self, state):
self.hello_str, self.integer, self.tensor = state self.hello_str, self.integer, self.tensor = state
class HelloRequest(dgl.distributed.Request): class HelloRequest(dgl.distributed.Request):
def __init__(self, hello_str, integer, tensor, func): def __init__(self, hello_str, integer, tensor, func):
self.hello_str = hello_str self.hello_str = hello_str
...@@ -85,7 +93,7 @@ class HelloRequest(dgl.distributed.Request): ...@@ -85,7 +93,7 @@ class HelloRequest(dgl.distributed.Request):
TIMEOUT_SERVICE_ID = 123456789 TIMEOUT_SERVICE_ID = 123456789
TIMEOUT_META = 'timeout_test' TIMEOUT_META = "timeout_test"
class TimeoutResponse(dgl.distributed.Response): class TimeoutResponse(dgl.distributed.Response):
...@@ -114,33 +122,53 @@ class TimeoutRequest(dgl.distributed.Request): ...@@ -114,33 +122,53 @@ class TimeoutRequest(dgl.distributed.Request):
def process_request(self, server_state): def process_request(self, server_state):
assert self.meta == TIMEOUT_META assert self.meta == TIMEOUT_META
# convert from milliseconds to seconds # convert from milliseconds to seconds
time.sleep(self.timeout/1000) time.sleep(self.timeout / 1000)
if not self.response: if not self.response:
return None return None
res = TimeoutResponse(self.meta) res = TimeoutResponse(self.meta)
return res return res
def start_server(num_clients, ip_config, server_id=0, keep_alive=False, num_servers=1, net_type='tensorpipe'):
def start_server(
num_clients,
ip_config,
server_id=0,
keep_alive=False,
num_servers=1,
net_type="tensorpipe",
):
print("Sleep 1 seconds to test client re-connect.") print("Sleep 1 seconds to test client re-connect.")
time.sleep(1) time.sleep(1)
server_state = dgl.distributed.ServerState( server_state = dgl.distributed.ServerState(
None, local_g=None, partition_book=None, keep_alive=keep_alive) None, local_g=None, partition_book=None, keep_alive=keep_alive
)
dgl.distributed.register_service( dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse) HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.register_service( dgl.distributed.register_service(
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse) TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse
)
print("Start server {}".format(server_id)) print("Start server {}".format(server_id))
dgl.distributed.start_server(server_id=server_id, dgl.distributed.start_server(
ip_config=ip_config, server_id=server_id,
num_servers=num_servers, ip_config=ip_config,
num_clients=num_clients, num_servers=num_servers,
server_state=server_state, num_clients=num_clients,
net_type=net_type) server_state=server_state,
net_type=net_type,
def start_client(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'): )
dgl.distributed.register_service(HELLO_SERVICE_ID, HelloRequest, HelloResponse)
def start_client(ip_config, group_id=0, num_servers=1, net_type="tensorpipe"):
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
dgl.distributed.connect_to_server( dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type) ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
req = HelloRequest(STR, INTEGER, TENSOR, simple_func) req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
# test send and recv # test send and recv
dgl.distributed.send_request(0, req) dgl.distributed.send_request(0, req)
...@@ -174,16 +202,23 @@ def start_client(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'): ...@@ -174,16 +202,23 @@ def start_client(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'):
assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR))
def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorpipe'): def start_client_timeout(
ip_config, group_id=0, num_servers=1, net_type="tensorpipe"
):
dgl.distributed.register_service( dgl.distributed.register_service(
TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse) TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse
)
dgl.distributed.connect_to_server( dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=num_servers, group_id=group_id, net_type=net_type) ip_config=ip_config,
num_servers=num_servers,
group_id=group_id,
net_type=net_type,
)
timeout = 1 * 1000 # milliseconds timeout = 1 * 1000 # milliseconds
req = TimeoutRequest(TIMEOUT_META, timeout) req = TimeoutRequest(TIMEOUT_META, timeout)
# test send and recv # test send and recv
dgl.distributed.send_request(0, req) dgl.distributed.send_request(0, req)
res = dgl.distributed.recv_response(timeout=int(timeout/2)) res = dgl.distributed.recv_response(timeout=int(timeout / 2))
assert res is None assert res is None
res = dgl.distributed.recv_response() res = dgl.distributed.recv_response()
assert res.meta == TIMEOUT_META assert res.meta == TIMEOUT_META
...@@ -195,14 +230,15 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp ...@@ -195,14 +230,15 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp
expect_except = False expect_except = False
try: try:
res_list = dgl.distributed.remote_call( res_list = dgl.distributed.remote_call(
target_and_requests, timeout=int(timeout/2)) target_and_requests, timeout=int(timeout / 2)
)
except dgl.DGLError: except dgl.DGLError:
expect_except = True expect_except = True
assert expect_except assert expect_except
# test send_request_to_machine # test send_request_to_machine
req = TimeoutRequest(TIMEOUT_META, timeout) req = TimeoutRequest(TIMEOUT_META, timeout)
dgl.distributed.send_request_to_machine(0, req) dgl.distributed.send_request_to_machine(0, req)
res = dgl.distributed.recv_response(timeout=int(timeout/2)) res = dgl.distributed.recv_response(timeout=int(timeout / 2))
assert res is None assert res is None
res = dgl.distributed.recv_response() res = dgl.distributed.recv_response()
assert res.meta == TIMEOUT_META assert res.meta == TIMEOUT_META
...@@ -214,30 +250,41 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp ...@@ -214,30 +250,41 @@ def start_client_timeout(ip_config, group_id=0, num_servers=1, net_type='tensorp
expect_except = False expect_except = False
try: try:
res_list = dgl.distributed.remote_call_to_machine( res_list = dgl.distributed.remote_call_to_machine(
target_and_requests, timeout=int(timeout/2)) target_and_requests, timeout=int(timeout / 2)
)
except dgl.DGLError: except dgl.DGLError:
expect_except = True expect_except = True
assert expect_except assert expect_except
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe']) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_rpc_timeout(net_type): def test_rpc_timeout(net_type):
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config.txt" ip_config = "rpc_ip_config.txt"
generate_ip_config(ip_config, 1, 1) generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver = ctx.Process(target=start_server, args=(1, ip_config, 0, False, 1, net_type)) pserver = ctx.Process(
pclient = ctx.Process(target=start_client_timeout, args=(ip_config, 0, 1, net_type)) target=start_server, args=(1, ip_config, 0, False, 1, net_type)
)
pclient = ctx.Process(
target=start_client_timeout, args=(ip_config, 0, 1, net_type)
)
pserver.start() pserver.start()
pclient.start() pclient.start()
pserver.join() pserver.join()
pclient.join() pclient.join()
def test_serialize(): def test_serialize():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
from dgl.distributed.rpc import serialize_to_payload, deserialize_from_payload from dgl.distributed.rpc import (
deserialize_from_payload,
serialize_to_payload,
)
SERVICE_ID = 12345 SERVICE_ID = 12345
dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse) dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse)
req = MyRequest() req = MyRequest()
...@@ -253,10 +300,16 @@ def test_serialize(): ...@@ -253,10 +300,16 @@ def test_serialize():
res1 = deserialize_from_payload(MyResponse, data, tensors) res1 = deserialize_from_payload(MyResponse, data, tensors)
assert res.x == res1.x assert res.x == res1.x
def test_rpc_msg(): def test_rpc_msg():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
from dgl.distributed.rpc import serialize_to_payload, deserialize_from_payload, RPCMessage from dgl.distributed.rpc import (
RPCMessage,
deserialize_from_payload,
serialize_to_payload,
)
SERVICE_ID = 32452 SERVICE_ID = 32452
dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse) dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse)
req = MyRequest() req = MyRequest()
...@@ -270,33 +323,45 @@ def test_rpc_msg(): ...@@ -270,33 +323,45 @@ def test_rpc_msg():
assert len(rpcmsg.tensors) == 1 assert len(rpcmsg.tensors) == 1
assert F.array_equal(rpcmsg.tensors[0], req.z) assert F.array_equal(rpcmsg.tensors[0], req.z)
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['tensorpipe']) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["tensorpipe"])
def test_rpc(net_type): def test_rpc(net_type):
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
generate_ip_config("rpc_ip_config.txt", 1, 1) generate_ip_config("rpc_ip_config.txt", 1, 1)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver = ctx.Process(target=start_server, args=(1, "rpc_ip_config.txt", 0, False, 1, net_type)) pserver = ctx.Process(
pclient = ctx.Process(target=start_client, args=("rpc_ip_config.txt", 0, 1, net_type)) target=start_server,
args=(1, "rpc_ip_config.txt", 0, False, 1, net_type),
)
pclient = ctx.Process(
target=start_client, args=("rpc_ip_config.txt", 0, 1, net_type)
)
pserver.start() pserver.start()
pclient.start() pclient.start()
pserver.join() pserver.join()
pclient.join() pclient.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe']) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_client(net_type): def test_multi_client(net_type):
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client.txt" ip_config = "rpc_ip_config_mul_client.txt"
generate_ip_config(ip_config, 1, 1) generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
num_clients = 20 num_clients = 20
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, 0, False, 1, net_type)) pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, 0, False, 1, net_type),
)
pclient_list = [] pclient_list = []
for i in range(num_clients): for i in range(num_clients):
pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1, net_type)) pclient = ctx.Process(
target=start_client, args=(ip_config, 0, 1, net_type)
)
pclient_list.append(pclient) pclient_list.append(pclient)
pserver.start() pserver.start()
for i in range(num_clients): for i in range(num_clients):
...@@ -306,37 +371,44 @@ def test_multi_client(net_type): ...@@ -306,37 +371,44 @@ def test_multi_client(net_type):
pserver.join() pserver.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe']) @pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_thread_rpc(net_type): def test_multi_thread_rpc(net_type):
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
num_servers = 2 num_servers = 2
ip_config = "rpc_ip_config_multithread.txt" ip_config = "rpc_ip_config_multithread.txt"
generate_ip_config(ip_config, num_servers, num_servers) generate_ip_config(ip_config, num_servers, num_servers)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver_list = [] pserver_list = []
for i in range(num_servers): for i in range(num_servers):
pserver = ctx.Process(target=start_server, args=(1, ip_config, i, False, 1, net_type)) pserver = ctx.Process(
target=start_server, args=(1, ip_config, i, False, 1, net_type)
)
pserver.start() pserver.start()
pserver_list.append(pserver) pserver_list.append(pserver)
def start_client_multithread(ip_config): def start_client_multithread(ip_config):
import threading import threading
dgl.distributed.connect_to_server(ip_config=ip_config, num_servers=1, net_type=net_type)
dgl.distributed.register_service(HELLO_SERVICE_ID, HelloRequest, HelloResponse) dgl.distributed.connect_to_server(
ip_config=ip_config, num_servers=1, net_type=net_type
)
dgl.distributed.register_service(
HELLO_SERVICE_ID, HelloRequest, HelloResponse
)
req = HelloRequest(STR, INTEGER, TENSOR, simple_func) req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
dgl.distributed.send_request(0, req) dgl.distributed.send_request(0, req)
def subthread_call(server_id): def subthread_call(server_id):
req = HelloRequest(STR, INTEGER, TENSOR, simple_func) req = HelloRequest(STR, INTEGER, TENSOR, simple_func)
dgl.distributed.send_request(server_id, req) dgl.distributed.send_request(server_id, req)
subthread = threading.Thread(target=subthread_call, args=(1,)) subthread = threading.Thread(target=subthread_call, args=(1,))
subthread.start() subthread.start()
subthread.join() subthread.join()
res0 = dgl.distributed.recv_response() res0 = dgl.distributed.recv_response()
res1 = dgl.distributed.recv_response() res1 = dgl.distributed.recv_response()
# Order is not guaranteed # Order is not guaranteed
...@@ -347,11 +419,15 @@ def test_multi_thread_rpc(net_type): ...@@ -347,11 +419,15 @@ def test_multi_thread_rpc(net_type):
start_client_multithread(ip_config) start_client_multithread(ip_config)
pserver.join() pserver.join()
@unittest.skipIf(True, reason="Tests of multiple groups may fail and let's disable them for now.")
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') @unittest.skipIf(
True,
reason="Tests of multiple groups may fail and let's disable them for now.",
)
@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
def test_multi_client_groups(): def test_multi_client_groups():
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client_groups.txt" ip_config = "rpc_ip_config_mul_client_groups.txt"
num_machines = 5 num_machines = 5
# should test with larger number but due to possible port in-use issue. # should test with larger number but due to possible port in-use issue.
...@@ -360,16 +436,21 @@ def test_multi_client_groups(): ...@@ -360,16 +436,21 @@ def test_multi_client_groups():
# presssue test # presssue test
num_clients = 2 num_clients = 2
num_groups = 2 num_groups = 2
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
pserver_list = [] pserver_list = []
for i in range(num_servers*num_machines): for i in range(num_servers * num_machines):
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, i, True, num_servers)) pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, i, True, num_servers),
)
pserver.start() pserver.start()
pserver_list.append(pserver) pserver_list.append(pserver)
pclient_list = [] pclient_list = []
for i in range(num_clients): for i in range(num_clients):
for group_id in range(num_groups): for group_id in range(num_groups):
pclient = ctx.Process(target=start_client, args=(ip_config, group_id, num_servers)) pclient = ctx.Process(
target=start_client, args=(ip_config, group_id, num_servers)
)
pclient.start() pclient.start()
pclient_list.append(pclient) pclient_list.append(pclient)
for p in pclient_list: for p in pclient_list:
...@@ -381,19 +462,23 @@ def test_multi_client_groups(): ...@@ -381,19 +462,23 @@ def test_multi_client_groups():
for p in pserver_list: for p in pserver_list:
p.join() p.join()
@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
@pytest.mark.parametrize("net_type", ['socket', 'tensorpipe']) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
@pytest.mark.parametrize("net_type", ["socket", "tensorpipe"])
def test_multi_client_connect(net_type): def test_multi_client_connect(net_type):
reset_envs() reset_envs()
os.environ['DGL_DIST_MODE'] = 'distributed' os.environ["DGL_DIST_MODE"] = "distributed"
ip_config = "rpc_ip_config_mul_client.txt" ip_config = "rpc_ip_config_mul_client.txt"
generate_ip_config(ip_config, 1, 1) generate_ip_config(ip_config, 1, 1)
ctx = mp.get_context('spawn') ctx = mp.get_context("spawn")
num_clients = 1 num_clients = 1
pserver = ctx.Process(target=start_server, args=(num_clients, ip_config, 0, False, 1, net_type)) pserver = ctx.Process(
target=start_server,
args=(num_clients, ip_config, 0, False, 1, net_type),
)
# small max try times # small max try times
os.environ['DGL_DIST_MAX_TRY_TIMES'] = '1' os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1"
expect_except = False expect_except = False
try: try:
start_client(ip_config, 0, 1, net_type) start_client(ip_config, 0, 1, net_type)
...@@ -403,7 +488,7 @@ def test_multi_client_connect(net_type): ...@@ -403,7 +488,7 @@ def test_multi_client_connect(net_type):
assert expect_except assert expect_except
# large max try times # large max try times
os.environ['DGL_DIST_MAX_TRY_TIMES'] = '1024' os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1024"
pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1, net_type)) pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1, net_type))
pclient.start() pclient.start()
pserver.start() pserver.start()
...@@ -411,12 +496,13 @@ def test_multi_client_connect(net_type): ...@@ -411,12 +496,13 @@ def test_multi_client_connect(net_type):
pserver.join() pserver.join()
reset_envs() reset_envs()
if __name__ == '__main__':
if __name__ == "__main__":
test_serialize() test_serialize()
test_rpc_msg() test_rpc_msg()
test_rpc() test_rpc()
test_multi_client('socket') test_multi_client("socket")
test_multi_client('tesnsorpipe') test_multi_client("tesnsorpipe")
test_multi_thread_rpc() test_multi_thread_rpc()
test_multi_client_connect('socket') test_multi_client_connect("socket")
test_multi_client_connect('tensorpipe') test_multi_client_connect("tensorpipe")
import socket
import os import os
import random import random
import scipy.sparse as spsp import socket
import numpy as np import numpy as np
import scipy.sparse as spsp
import dgl import dgl
...@@ -13,10 +14,10 @@ def generate_ip_config(file_name, num_machines, num_servers): ...@@ -13,10 +14,10 @@ def generate_ip_config(file_name, num_machines, num_servers):
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try: try:
# doesn't even have to be reachable # doesn't even have to be reachable
sock.connect(('10.255.255.255', 1)) sock.connect(("10.255.255.255", 1))
ip = sock.getsockname()[0] ip = sock.getsockname()[0]
except ValueError: except ValueError:
ip = '127.0.0.1' ip = "127.0.0.1"
finally: finally:
sock.close() sock.close()
...@@ -35,16 +36,23 @@ def generate_ip_config(file_name, num_machines, num_servers): ...@@ -35,16 +36,23 @@ def generate_ip_config(file_name, num_machines, num_servers):
sock.close() sock.close()
if len(ports) < num_machines * num_servers: if len(ports) < num_machines * num_servers:
raise RuntimeError( raise RuntimeError(
"Failed to get available IP/PORT with required numbers.") "Failed to get available IP/PORT with required numbers."
with open(file_name, 'w') as f: )
with open(file_name, "w") as f:
for i in range(num_machines): for i in range(num_machines):
f.write('{} {}\n'.format(ip, ports[i*num_servers])) f.write("{} {}\n".format(ip, ports[i * num_servers]))
def reset_envs(): def reset_envs():
"""Reset common environment variable which are set in tests. """ """Reset common environment variable which are set in tests."""
for key in ['DGL_ROLE', 'DGL_NUM_SAMPLER', 'DGL_NUM_SERVER', \ for key in [
'DGL_DIST_MODE', 'DGL_NUM_CLIENT', 'DGL_DIST_MAX_TRY_TIMES']: "DGL_ROLE",
"DGL_NUM_SAMPLER",
"DGL_NUM_SERVER",
"DGL_DIST_MODE",
"DGL_NUM_CLIENT",
"DGL_DIST_MAX_TRY_TIMES",
]:
if key in os.environ: if key in os.environ:
os.environ.pop(key) os.environ.pop(key)
......
import dgl
import pytest import pytest
import torch import torch
from dglgo.model import *
from test_utils.graph_cases import get_cases from test_utils.graph_cases import get_cases
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature'])) import dgl
from dglgo.model import *
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gcn(g): def test_gcn(g):
data_info = { data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
'num_nodes': g.num_nodes(),
'out_size': 7
}
node_feat = None node_feat = None
edge_feat = g.edata['scalar_w'] edge_feat = g.edata["scalar_w"]
# node embedding + not use_edge_weight # node embedding + not use_edge_weight
model = GCN(data_info, embed_size=10, use_edge_weight=False) model = GCN(data_info, embed_size=10, use_edge_weight=False)
...@@ -21,8 +20,8 @@ def test_gcn(g): ...@@ -21,8 +20,8 @@ def test_gcn(g):
model = GCN(data_info, embed_size=10, use_edge_weight=True) model = GCN(data_info, embed_size=10, use_edge_weight=True)
model(g, node_feat, edge_feat) model(g, node_feat, edge_feat)
data_info['in_size'] = g.ndata['h'].shape[-1] data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata['h'] node_feat = g.ndata["h"]
# node feat + not use_edge_weight # node feat + not use_edge_weight
model = GCN(data_info, embed_size=-1, use_edge_weight=False) model = GCN(data_info, embed_size=-1, use_edge_weight=False)
...@@ -32,15 +31,13 @@ def test_gcn(g): ...@@ -32,15 +31,13 @@ def test_gcn(g):
model = GCN(data_info, embed_size=-1, use_edge_weight=True) model = GCN(data_info, embed_size=-1, use_edge_weight=True)
model(g, node_feat, edge_feat) model(g, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_gcn_block(g): def test_gcn_block(g):
data_info = { data_info = {"in_size": 10, "out_size": 7}
'in_size': 10,
'out_size': 7
}
blocks = [g] blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size']) node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
edge_feat = torch.abs(torch.randn(g.num_edges())) edge_feat = torch.abs(torch.randn(g.num_edges()))
# not use_edge_weight # not use_edge_weight
model = GCN(data_info, use_edge_weight=False) model = GCN(data_info, use_edge_weight=False)
...@@ -50,12 +47,10 @@ def test_gcn_block(g): ...@@ -50,12 +47,10 @@ def test_gcn_block(g):
model = GCN(data_info, use_edge_weight=True) model = GCN(data_info, use_edge_weight=True)
model.forward_block(blocks, node_feat, edge_feat) model.forward_block(blocks, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gat(g): def test_gat(g):
data_info = { data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
'num_nodes': g.num_nodes(),
'out_size': 7
}
node_feat = None node_feat = None
# node embedding # node embedding
...@@ -63,29 +58,25 @@ def test_gat(g): ...@@ -63,29 +58,25 @@ def test_gat(g):
model(g, node_feat) model(g, node_feat)
# node feat # node feat
data_info['in_size'] = g.ndata['h'].shape[-1] data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata['h'] node_feat = g.ndata["h"]
model = GAT(data_info, embed_size=-1) model = GAT(data_info, embed_size=-1)
model(g, node_feat) model(g, node_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_gat_block(g): def test_gat_block(g):
data_info = { data_info = {"in_size": 10, "out_size": 7}
'in_size': 10,
'out_size': 7
}
blocks = [g] blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size']) node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
model = GAT(data_info, num_layers=1, heads=[8]) model = GAT(data_info, num_layers=1, heads=[8])
model.forward_block(blocks, node_feat) model.forward_block(blocks, node_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_gin(g): def test_gin(g):
data_info = { data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
'num_nodes': g.num_nodes(),
'out_size': 7
}
node_feat = None node_feat = None
# node embedding # node embedding
...@@ -93,19 +84,17 @@ def test_gin(g): ...@@ -93,19 +84,17 @@ def test_gin(g):
model(g, node_feat) model(g, node_feat)
# node feat # node feat
data_info['in_size'] = g.ndata['h'].shape[-1] data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata['h'] node_feat = g.ndata["h"]
model = GIN(data_info, embed_size=-1) model = GIN(data_info, embed_size=-1)
model(g, node_feat) model(g, node_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_sage(g): def test_sage(g):
data_info = { data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
'num_nodes': g.num_nodes(),
'out_size': 7
}
node_feat = None node_feat = None
edge_feat = g.edata['scalar_w'] edge_feat = g.edata["scalar_w"]
# node embedding # node embedding
model = GraphSAGE(data_info, embed_size=10) model = GraphSAGE(data_info, embed_size=10)
...@@ -113,32 +102,28 @@ def test_sage(g): ...@@ -113,32 +102,28 @@ def test_sage(g):
model(g, node_feat, edge_feat) model(g, node_feat, edge_feat)
# node feat # node feat
data_info['in_size'] = g.ndata['h'].shape[-1] data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata['h'] node_feat = g.ndata["h"]
model = GraphSAGE(data_info, embed_size=-1) model = GraphSAGE(data_info, embed_size=-1)
model(g, node_feat) model(g, node_feat)
model(g, node_feat, edge_feat) model(g, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['block-bipartite']))
@pytest.mark.parametrize("g", get_cases(["block-bipartite"]))
def test_sage_block(g): def test_sage_block(g):
data_info = { data_info = {"in_size": 10, "out_size": 7}
'in_size': 10,
'out_size': 7
}
blocks = [g] blocks = [g]
node_feat = torch.randn(g.num_src_nodes(), data_info['in_size']) node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"])
edge_feat = torch.abs(torch.randn(g.num_edges())) edge_feat = torch.abs(torch.randn(g.num_edges()))
model = GraphSAGE(data_info, embed_size=-1) model = GraphSAGE(data_info, embed_size=-1)
model.forward_block(blocks, node_feat) model.forward_block(blocks, node_feat)
model.forward_block(blocks, node_feat, edge_feat) model.forward_block(blocks, node_feat, edge_feat)
@pytest.mark.parametrize('g', get_cases(['has_scalar_e_feature']))
@pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"]))
def test_sgc(g): def test_sgc(g):
data_info = { data_info = {"num_nodes": g.num_nodes(), "out_size": 7}
'num_nodes': g.num_nodes(),
'out_size': 7
}
node_feat = None node_feat = None
# node embedding # node embedding
...@@ -146,44 +131,37 @@ def test_sgc(g): ...@@ -146,44 +131,37 @@ def test_sgc(g):
model(g, node_feat) model(g, node_feat)
# node feat # node feat
data_info['in_size'] = g.ndata['h'].shape[-1] data_info["in_size"] = g.ndata["h"].shape[-1]
node_feat = g.ndata['h'] node_feat = g.ndata["h"]
model = SGC(data_info, embed_size=-1) model = SGC(data_info, embed_size=-1)
model(g, node_feat) model(g, node_feat)
def test_bilinear(): def test_bilinear():
data_info = { data_info = {"in_size": 10, "out_size": 1}
'in_size': 10,
'out_size': 1
}
model = BilinearPredictor(data_info) model = BilinearPredictor(data_info)
num_pairs = 10 num_pairs = 10
h_src = torch.randn(num_pairs, data_info['in_size']) h_src = torch.randn(num_pairs, data_info["in_size"])
h_dst = torch.randn(num_pairs, data_info['in_size']) h_dst = torch.randn(num_pairs, data_info["in_size"])
model(h_src, h_dst) model(h_src, h_dst)
def test_ele(): def test_ele():
data_info = { data_info = {"in_size": 10, "out_size": 1}
'in_size': 10,
'out_size': 1
}
model = ElementWiseProductPredictor(data_info) model = ElementWiseProductPredictor(data_info)
num_pairs = 10 num_pairs = 10
h_src = torch.randn(num_pairs, data_info['in_size']) h_src = torch.randn(num_pairs, data_info["in_size"])
h_dst = torch.randn(num_pairs, data_info['in_size']) h_dst = torch.randn(num_pairs, data_info["in_size"])
model(h_src, h_dst) model(h_src, h_dst)
@pytest.mark.parametrize('virtual_node', [True, False])
@pytest.mark.parametrize("virtual_node", [True, False])
def test_ogbg_gin(virtual_node): def test_ogbg_gin(virtual_node):
# Test for ogbg-mol datasets # Test for ogbg-mol datasets
data_info = { data_info = {"name": "ogbg-molhiv", "out_size": 1}
'name': 'ogbg-molhiv', model = OGBGGIN(
'out_size': 1 data_info, embed_size=10, num_layers=2, virtual_node=virtual_node
} )
model = OGBGGIN(data_info,
embed_size=10,
num_layers=2,
virtual_node=virtual_node)
num_nodes = 5 num_nodes = 5
num_edges = 15 num_edges = 15
g1 = dgl.rand_graph(num_nodes, num_edges) g1 = dgl.rand_graph(num_nodes, num_edges)
...@@ -197,29 +175,23 @@ def test_ogbg_gin(virtual_node): ...@@ -197,29 +175,23 @@ def test_ogbg_gin(virtual_node):
# Test for non-ogbg-mol datasets # Test for non-ogbg-mol datasets
data_info = { data_info = {
'name': 'a_dataset', "name": "a_dataset",
'out_size': 1, "out_size": 1,
'node_feat_size': 15, "node_feat_size": 15,
'edge_feat_size': 5 "edge_feat_size": 5,
} }
model = OGBGGIN(data_info, model = OGBGGIN(
embed_size=10, data_info, embed_size=10, num_layers=2, virtual_node=virtual_node
num_layers=2, )
virtual_node=virtual_node) nfeat = torch.randn(num_nodes, data_info["node_feat_size"])
nfeat = torch.randn(num_nodes, data_info['node_feat_size']) efeat = torch.randn(num_edges, data_info["edge_feat_size"])
efeat = torch.randn(num_edges, data_info['edge_feat_size'])
model(g, nfeat, efeat) model(g, nfeat, efeat)
def test_pna(): def test_pna():
# Test for ogbg-mol datasets # Test for ogbg-mol datasets
data_info = { data_info = {"name": "ogbg-molhiv", "delta": 1, "out_size": 1}
'name': 'ogbg-molhiv', model = PNA(data_info, embed_size=10, num_layers=2)
'delta': 1,
'out_size': 1
}
model = PNA(data_info,
embed_size=10,
num_layers=2)
num_nodes = 5 num_nodes = 5
num_edges = 15 num_edges = 15
g = dgl.rand_graph(num_nodes, num_edges) g = dgl.rand_graph(num_nodes, num_edges)
...@@ -228,13 +200,11 @@ def test_pna(): ...@@ -228,13 +200,11 @@ def test_pna():
# Test for non-ogbg-mol datasets # Test for non-ogbg-mol datasets
data_info = { data_info = {
'name': 'a_dataset', "name": "a_dataset",
'node_feat_size': 15, "node_feat_size": 15,
'delta': 1, "delta": 1,
'out_size': 1 "out_size": 1,
} }
model = PNA(data_info, model = PNA(data_info, embed_size=10, num_layers=2)
embed_size=10, nfeat = torch.randn(num_nodes, data_info["node_feat_size"])
num_layers=2)
nfeat = torch.randn(num_nodes, data_info['node_feat_size'])
model(g, nfeat) model(g, nfeat)
import os import os
import pytest import pytest
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products']) @pytest.mark.parametrize(
"data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
],
)
def test_nodepred_data(data): def test_nodepred_data(data):
os.system(f'dgl configure nodepred --data {data} --model gcn') os.system(f"dgl configure nodepred --data {data} --model gcn")
assert os.path.exists(f'nodepred_{data}_gcn.yaml') assert os.path.exists(f"nodepred_{data}_gcn.yaml")
custom_cfg = f'custom_{data}_gcn.yaml' custom_cfg = f"custom_{data}_gcn.yaml"
os.system(f'dgl configure nodepred --data {data} --model gcn --cfg {custom_cfg}') os.system(
f"dgl configure nodepred --data {data} --model gcn --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = f'{data}_gcn.py' custom_script = f"{data}_gcn.py"
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}') os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('model', ['gcn', 'gat', 'sage', 'sgc', 'gin'])
@pytest.mark.parametrize("model", ["gcn", "gat", "sage", "sgc", "gin"])
def test_nodepred_model(model): def test_nodepred_model(model):
os.system(f'dgl configure nodepred --data cora --model {model}') os.system(f"dgl configure nodepred --data cora --model {model}")
assert os.path.exists(f'nodepred_cora_{model}.yaml') assert os.path.exists(f"nodepred_cora_{model}.yaml")
custom_cfg = f'custom_cora_{model}.yaml' custom_cfg = f"custom_cora_{model}.yaml"
os.system(f'dgl configure nodepred --data cora --model {model} --cfg {custom_cfg}') os.system(
f"dgl configure nodepred --data cora --model {model} --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = f'cora_{model}.py' custom_script = f"cora_{model}.py"
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}') os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products']) @pytest.mark.parametrize(
"data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
],
)
def test_nodepred_ns_data(data): def test_nodepred_ns_data(data):
os.system(f'dgl configure nodepred-ns --data {data} --model gcn') os.system(f"dgl configure nodepred-ns --data {data} --model gcn")
assert os.path.exists(f'nodepred-ns_{data}_gcn.yaml') assert os.path.exists(f"nodepred-ns_{data}_gcn.yaml")
custom_cfg = f'ns-custom_{data}_gcn.yaml' custom_cfg = f"ns-custom_{data}_gcn.yaml"
os.system(f'dgl configure nodepred-ns --data {data} --model gcn --cfg {custom_cfg}') os.system(
f"dgl configure nodepred-ns --data {data} --model gcn --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = f'ns-{data}_gcn.py' custom_script = f"ns-{data}_gcn.py"
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}') os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('model', ['gcn', 'gat', 'sage']) @pytest.mark.parametrize("model", ["gcn", "gat", "sage"])
def test_nodepred_ns_model(model): def test_nodepred_ns_model(model):
os.system(f'dgl configure nodepred-ns --data cora --model {model}') os.system(f"dgl configure nodepred-ns --data cora --model {model}")
assert os.path.exists(f'nodepred-ns_cora_{model}.yaml') assert os.path.exists(f"nodepred-ns_cora_{model}.yaml")
custom_cfg = f'ns-custom_cora_{model}.yaml' custom_cfg = f"ns-custom_cora_{model}.yaml"
os.system(f'dgl configure nodepred-ns --data cora --model {model} --cfg {custom_cfg}') os.system(
f"dgl configure nodepred-ns --data cora --model {model} --cfg {custom_cfg}"
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = f'ns-cora_{model}.py' custom_script = f"ns-cora_{model}.py"
os.system(f'dgl export --cfg {custom_cfg} --output {custom_script}') os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}")
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['cora', 'citeseer', 'pubmed', 'csv', 'reddit',
'co-buy-computer', 'ogbn-arxiv', 'ogbn-products', 'ogbl-collab', @pytest.mark.parametrize(
'ogbl-citation2']) "data",
[
"cora",
"citeseer",
"pubmed",
"csv",
"reddit",
"co-buy-computer",
"ogbn-arxiv",
"ogbn-products",
"ogbl-collab",
"ogbl-citation2",
],
)
def test_linkpred_data(data): def test_linkpred_data(data):
node_model = 'gcn' node_model = "gcn"
edge_model = 'ele' edge_model = "ele"
neg_sampler = 'global' neg_sampler = "global"
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml' custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format( os.system(
data, node_model, edge_model, neg_sampler, custom_cfg)) "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py' custom_script = (
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script)) "_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('node_model', ['gcn' ,'gat', 'sage', 'sgc', 'gin'])
@pytest.mark.parametrize("node_model", ["gcn", "gat", "sage", "sgc", "gin"])
def test_linkpred_node_model(node_model): def test_linkpred_node_model(node_model):
data = 'cora' data = "cora"
edge_model = 'ele' edge_model = "ele"
neg_sampler = 'global' neg_sampler = "global"
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml' custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format( os.system(
data, node_model, edge_model, neg_sampler, custom_cfg)) "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py' custom_script = (
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script)) "_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('edge_model', ['ele', 'bilinear']) @pytest.mark.parametrize("edge_model", ["ele", "bilinear"])
def test_linkpred_edge_model(edge_model): def test_linkpred_edge_model(edge_model):
data = 'cora' data = "cora"
node_model = 'gcn' node_model = "gcn"
neg_sampler = 'global' neg_sampler = "global"
custom_cfg = '_'.join([data, node_model, edge_model, neg_sampler]) + '.yaml' custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml"
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format( os.system(
data, node_model, edge_model, neg_sampler, custom_cfg)) "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, node_model, edge_model, neg_sampler]) + '.py' custom_script = (
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script)) "_".join([data, node_model, edge_model, neg_sampler]) + ".py"
)
os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('neg_sampler', ['global', 'persource', '']) @pytest.mark.parametrize("neg_sampler", ["global", "persource", ""])
def test_linkpred_neg_sampler(neg_sampler): def test_linkpred_neg_sampler(neg_sampler):
data = 'cora' data = "cora"
node_model = 'gcn' node_model = "gcn"
edge_model = 'ele' edge_model = "ele"
custom_cfg = f'{data}_{node_model}_{edge_model}_{neg_sampler}.yaml' custom_cfg = f"{data}_{node_model}_{edge_model}_{neg_sampler}.yaml"
if neg_sampler == '': if neg_sampler == "":
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --cfg {}'.format( os.system(
data, node_model, edge_model, custom_cfg)) "dgl configure linkpred --data {} --node-model {} --edge-model {} --cfg {}".format(
data, node_model, edge_model, custom_cfg
)
)
else: else:
os.system('dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}'.format( os.system(
data, node_model, edge_model, neg_sampler, custom_cfg)) "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format(
data, node_model, edge_model, neg_sampler, custom_cfg
)
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = f'{data}_{node_model}_{edge_model}_{neg_sampler}.py' custom_script = f"{data}_{node_model}_{edge_model}_{neg_sampler}.py"
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script)) os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('data', ['csv', 'ogbg-molhiv', 'ogbg-molpcba'])
@pytest.mark.parametrize('model', ['gin', 'pna']) @pytest.mark.parametrize("data", ["csv", "ogbg-molhiv", "ogbg-molpcba"])
@pytest.mark.parametrize("model", ["gin", "pna"])
def test_graphpred(data, model): def test_graphpred(data, model):
os.system('dgl configure graphpred --data {} --model {}'.format(data, model)) os.system(
assert os.path.exists('graphpred_{}_{}.yaml'.format(data, model)) "dgl configure graphpred --data {} --model {}".format(data, model)
)
assert os.path.exists("graphpred_{}_{}.yaml".format(data, model))
custom_cfg = 'custom_{}_{}.yaml'.format(data, model) custom_cfg = "custom_{}_{}.yaml".format(data, model)
os.system('dgl configure graphpred --data {} --model {} --cfg {}'.format(data, model, os.system(
custom_cfg)) "dgl configure graphpred --data {} --model {} --cfg {}".format(
data, model, custom_cfg
)
)
assert os.path.exists(custom_cfg) assert os.path.exists(custom_cfg)
custom_script = '_'.join([data, model]) + '.py' custom_script = "_".join([data, model]) + ".py"
os.system('dgl export --cfg {} --output {}'.format(custom_cfg, custom_script)) os.system(
"dgl export --cfg {} --output {}".format(custom_cfg, custom_script)
)
assert os.path.exists(custom_script) assert os.path.exists(custom_script)
@pytest.mark.parametrize('recipe',
['graphpred_hiv_gin.yaml', @pytest.mark.parametrize(
'graphpred_hiv_pna.yaml', "recipe",
'graphpred_pcba_gin.yaml', [
'linkpred_cora_sage.yaml', "graphpred_hiv_gin.yaml",
'linkpred_citation2_sage.yaml', "graphpred_hiv_pna.yaml",
'linkpred_collab_sage.yaml', "graphpred_pcba_gin.yaml",
'nodepred_citeseer_gat.yaml', "linkpred_cora_sage.yaml",
'nodepred_citeseer_gcn.yaml', "linkpred_citation2_sage.yaml",
'nodepred_citeseer_sage.yaml', "linkpred_collab_sage.yaml",
'nodepred_cora_gat.yaml', "nodepred_citeseer_gat.yaml",
'nodepred_cora_gcn.yaml', "nodepred_citeseer_gcn.yaml",
'nodepred_cora_sage.yaml', "nodepred_citeseer_sage.yaml",
'nodepred_pubmed_gat.yaml', "nodepred_cora_gat.yaml",
'nodepred_pubmed_gcn.yaml', "nodepred_cora_gcn.yaml",
'nodepred_pubmed_sage.yaml', "nodepred_cora_sage.yaml",
'nodepred-ns_arxiv_gcn.yaml', "nodepred_pubmed_gat.yaml",
'nodepred-ns_product_sage.yaml']) "nodepred_pubmed_gcn.yaml",
"nodepred_pubmed_sage.yaml",
"nodepred-ns_arxiv_gcn.yaml",
"nodepred-ns_product_sage.yaml",
],
)
def test_recipe(recipe): def test_recipe(recipe):
# Remove all generated yaml files # Remove all generated yaml files
current_dir = os.listdir("./") current_dir = os.listdir("./")
...@@ -160,19 +250,22 @@ def test_recipe(recipe): ...@@ -160,19 +250,22 @@ def test_recipe(recipe):
if item.endswith(".yaml"): if item.endswith(".yaml"):
os.remove(item) os.remove(item)
os.system('dgl recipe get {}'.format(recipe)) os.system("dgl recipe get {}".format(recipe))
assert os.path.exists(recipe) assert os.path.exists(recipe)
def test_node_cora(): def test_node_cora():
os.system('dgl configure nodepred --data cora --model gcn') os.system("dgl configure nodepred --data cora --model gcn")
os.system('dgl train --cfg nodepred_cora_gcn.yaml') os.system("dgl train --cfg nodepred_cora_gcn.yaml")
assert os.path.exists('results') assert os.path.exists("results")
assert os.path.exists('results/run_0.pth') assert os.path.exists("results/run_0.pth")
os.system('dgl configure-apply nodepred --cpt results/run_0.pth') os.system("dgl configure-apply nodepred --cpt results/run_0.pth")
assert os.path.exists('apply_nodepred_cora_gcn.yaml') assert os.path.exists("apply_nodepred_cora_gcn.yaml")
os.system('dgl configure-apply nodepred --data cora --cpt results/run_0.pth --cfg apply.yaml') os.system(
assert os.path.exists('apply.yaml') "dgl configure-apply nodepred --data cora --cpt results/run_0.pth --cfg apply.yaml"
os.system('dgl apply --cfg apply.yaml') )
assert os.path.exists('apply_results/output.csv') assert os.path.exists("apply.yaml")
os.system('dgl export --cfg apply.yaml --output apply.py') os.system("dgl apply --cfg apply.yaml")
assert os.path.exists('apply.py') assert os.path.exists("apply_results/output.csv")
os.system("dgl export --cfg apply.yaml --output apply.py")
assert os.path.exists("apply.py")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment