Unverified Commit 74c9d27d authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Auto-format tests. (#5313)



* [Misc] Auto-format tests.

* more

---------
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
parent 86193c26
import unittest
import backend as F
import numpy as np
import dgl
import dgl.ndarray as nd
import numpy as np
@unittest.skipIf(
......
......@@ -735,11 +735,7 @@ def _test_DefaultDataParser():
# string consists of non-numeric values
with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame(
{
"label": ["a", "b", "c"],
}
)
df = pd.DataFrame({"label": ["a", "b", "c"]})
df.to_csv(csv_path, index=False)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
......@@ -752,11 +748,7 @@ def _test_DefaultDataParser():
# csv has index column which is ignored as it's unnamed
with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame(
{
"label": [1, 2, 3],
}
)
df = pd.DataFrame({"label": [1, 2, 3]})
df.to_csv(csv_path)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
......@@ -1042,9 +1034,7 @@ def _test_load_edge_data_from_csv():
# required headers are missing
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
}
{"src_id": np.random.randint(num_nodes, size=num_edges)}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
......@@ -1056,9 +1046,7 @@ def _test_load_edge_data_from_csv():
expect_except = True
assert expect_except
df = pd.DataFrame(
{
"dst_id": np.random.randint(num_nodes, size=num_edges),
}
{"dst_id": np.random.randint(num_nodes, size=num_edges)}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
......
......@@ -4,12 +4,12 @@ import time
import unittest
import backend as F
import numpy as np
import pytest
import scipy as sp
import dgl
import dgl.ndarray as nd
import numpy as np
import pytest
import scipy as sp
from dgl import DGLGraph
from dgl.data.utils import load_labels, load_tensors, save_tensors
......
import unittest
import backend as F
from test_utils import parametrize_idtype
import dgl
from dgl.dataloading import (
NeighborSampler,
as_edge_prediction_sampler,
negative_sampler,
NeighborSampler,
)
from test_utils import parametrize_idtype
def create_test_graph(idtype):
......
import unittest
from collections import defaultdict as ddict
import backend as F
import dgl
import networkx as nx
import numpy as np
import scipy.sparse as ssp
import networkx as nx
from dgl import DGLGraph
from collections import defaultdict as ddict
import unittest
from test_utils import parametrize_idtype
D = 5
reduce_msg_shapes = set()
def message_func(edges):
assert F.ndim(edges.src['h']) == 2
assert F.shape(edges.src['h'])[1] == D
return {'m' : edges.src['h']}
assert F.ndim(edges.src["h"]) == 2
assert F.shape(edges.src["h"])[1] == D
return {"m": edges.src["h"]}
def reduce_func(nodes):
msgs = nodes.mailbox['m']
msgs = nodes.mailbox["m"]
reduce_msg_shapes.add(tuple(msgs.shape))
assert F.ndim(msgs) == 3
assert F.shape(msgs)[2] == D
return {'accum' : F.sum(msgs, 1)}
return {"accum": F.sum(msgs, 1)}
def apply_node_func(nodes):
return {'h' : nodes.data['h'] + nodes.data['accum']}
return {"h": nodes.data["h"] + nodes.data["accum"]}
def generate_graph_old(grad=False):
g = DGLGraph()
g.add_nodes(10) # 10 nodes
g.add_nodes(10) # 10 nodes
# create a graph where 0 is the source and 9 is the sink
# 17 edges
for i in range(1, 9):
......@@ -43,14 +49,15 @@ def generate_graph_old(grad=False):
ncol = F.attach_grad(ncol)
ecol = F.attach_grad(ecol)
g.ndata['h'] = ncol
g.edata['w'] = ecol
g.ndata["h"] = ncol
g.edata["w"] = ecol
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)
return g
def generate_graph(idtype, grad=False):
'''
"""
s, d, eid
0, 1, 0
1, 9, 1
......@@ -69,7 +76,7 @@ def generate_graph(idtype, grad=False):
0, 8, 14
8, 9, 15
9, 0, 16
'''
"""
u = F.tensor([0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 9])
v = F.tensor([1, 9, 2, 9, 3, 9, 4, 9, 5, 9, 6, 9, 7, 9, 8, 9, 0])
g = dgl.graph((u, v), idtype=idtype)
......@@ -80,37 +87,51 @@ def generate_graph(idtype, grad=False):
ncol = F.attach_grad(ncol)
ecol = F.attach_grad(ecol)
g.ndata['h'] = ncol
g.edata['w'] = ecol
g.ndata["h"] = ncol
g.edata["w"] = ecol
g.set_n_initializer(dgl.init.zero_initializer)
g.set_e_initializer(dgl.init.zero_initializer)
return g
def test_compatible():
g = generate_graph_old()
@parametrize_idtype
def test_batch_setter_getter(idtype):
def _pfc(x):
return list(F.zerocopy_to_numpy(x)[:,0])
return list(F.zerocopy_to_numpy(x)[:, 0])
g = generate_graph(idtype)
# set all nodes
g.ndata['h'] = F.zeros((10, D))
assert F.allclose(g.ndata['h'], F.zeros((10, D)))
g.ndata["h"] = F.zeros((10, D))
assert F.allclose(g.ndata["h"], F.zeros((10, D)))
# pop nodes
old_len = len(g.ndata)
g.ndata.pop('h')
g.ndata.pop("h")
assert len(g.ndata) == old_len - 1
g.ndata['h'] = F.zeros((10, D))
g.ndata["h"] = F.zeros((10, D))
# set partial nodes
u = F.tensor([1, 3, 5], g.idtype)
g.nodes[u].data['h'] = F.ones((3, D))
assert _pfc(g.ndata['h']) == [0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]
g.nodes[u].data["h"] = F.ones((3, D))
assert _pfc(g.ndata["h"]) == [
0.0,
1.0,
0.0,
1.0,
0.0,
1.0,
0.0,
0.0,
0.0,
0.0,
]
# get partial nodes
u = F.tensor([1, 2, 3], g.idtype)
assert _pfc(g.nodes[u].data['h']) == [1., 0., 1.]
assert _pfc(g.nodes[u].data["h"]) == [1.0, 0.0, 1.0]
'''
"""
s, d, eid
0, 1, 0
1, 9, 1
......@@ -129,49 +150,54 @@ def test_batch_setter_getter(idtype):
0, 8, 14
8, 9, 15
9, 0, 16
'''
"""
# set all edges
g.edata['l'] = F.zeros((17, D))
assert _pfc(g.edata['l']) == [0.] * 17
g.edata["l"] = F.zeros((17, D))
assert _pfc(g.edata["l"]) == [0.0] * 17
# pop edges
old_len = len(g.edata)
g.edata.pop('l')
g.edata.pop("l")
assert len(g.edata) == old_len - 1
g.edata['l'] = F.zeros((17, D))
g.edata["l"] = F.zeros((17, D))
# set partial edges (many-many)
u = F.tensor([0, 0, 2, 5, 9], g.idtype)
v = F.tensor([1, 3, 9, 9, 0], g.idtype)
g.edges[u, v].data['l'] = F.ones((5, D))
truth = [0.] * 17
truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.
assert _pfc(g.edata['l']) == truth
g.edges[u, v].data["l"] = F.ones((5, D))
truth = [0.0] * 17
truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.0
assert _pfc(g.edata["l"]) == truth
u = F.tensor([3, 4, 6], g.idtype)
v = F.tensor([9, 9, 9], g.idtype)
g.edges[u, v].data['l'] = F.ones((3, D))
truth[5] = truth[7] = truth[11] = 1.
assert _pfc(g.edata['l']) == truth
g.edges[u, v].data["l"] = F.ones((3, D))
truth[5] = truth[7] = truth[11] = 1.0
assert _pfc(g.edata["l"]) == truth
u = F.tensor([0, 0, 0], g.idtype)
v = F.tensor([4, 5, 6], g.idtype)
g.edges[u, v].data['l'] = F.ones((3, D))
truth[6] = truth[8] = truth[10] = 1.
assert _pfc(g.edata['l']) == truth
g.edges[u, v].data["l"] = F.ones((3, D))
truth[6] = truth[8] = truth[10] = 1.0
assert _pfc(g.edata["l"]) == truth
u = F.tensor([0, 6, 0], g.idtype)
v = F.tensor([6, 9, 7], g.idtype)
assert _pfc(g.edges[u, v].data['l']) == [1.0, 1.0, 0.0]
assert _pfc(g.edges[u, v].data["l"]) == [1.0, 1.0, 0.0]
@parametrize_idtype
def test_batch_setter_autograd(idtype):
g = generate_graph(idtype, grad=True)
h1 = g.ndata['h']
h1 = g.ndata["h"]
# partial set
v = F.tensor([1, 2, 8], g.idtype)
hh = F.attach_grad(F.zeros((len(v), D)))
with F.record_grad():
g.nodes[v].data['h'] = hh
h2 = g.ndata['h']
g.nodes[v].data["h"] = hh
h2 = g.ndata["h"]
F.backward(h2, F.ones((10, D)) * 2)
assert F.array_equal(F.grad(h1)[:,0], F.tensor([2., 0., 0., 2., 2., 2., 2., 2., 0., 2.]))
assert F.array_equal(F.grad(hh)[:,0], F.tensor([2., 2., 2.]))
assert F.array_equal(
F.grad(h1)[:, 0],
F.tensor([2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0]),
)
assert F.array_equal(F.grad(hh)[:, 0], F.tensor([2.0, 2.0, 2.0]))
def _test_nx_conversion():
# check conversion between networkx and DGLGraph
......@@ -195,8 +221,8 @@ def _test_nx_conversion():
if num_edges > 0:
edge_feat = ddict(lambda: [0] * num_edges)
for u, v, attr in nxg.edges(data=True):
assert len(attr) == len(ef) + 1 # extra id
eid = attr['id']
assert len(attr) == len(ef) + 1 # extra id
eid = attr["id"]
for k in ef:
edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
for k in edge_feat:
......@@ -211,18 +237,18 @@ def _test_nx_conversion():
e1 = F.randn((4, 5))
e2 = F.randn((4, 7))
g = dgl.graph(([0, 1, 3, 4], [2, 4, 0, 3]))
g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
g.edata.update({'e1': e1, 'e2': e2})
g.ndata.update({"n1": n1, "n2": n2, "n3": n3})
g.edata.update({"e1": e1, "e2": e2})
# convert to networkx
nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
nxg = g.to_networkx(node_attrs=["n1", "n3"], edge_attrs=["e1", "e2"])
assert len(nxg) == 5
assert nxg.size() == 4
_check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})
_check_nx_feature(nxg, {"n1": n1, "n3": n3}, {"e1": e1, "e2": e2})
# convert to DGLGraph, nx graph has id in edge feature
# use id feature to test non-tensor copy
g = dgl.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1", "id"])
# check graph size
assert g.number_of_nodes() == 5
assert g.number_of_edges() == 4
......@@ -231,32 +257,34 @@ def _test_nx_conversion():
assert len(g.ndata) == 1
assert len(g.edata) == 2
# check feature values
assert F.allclose(g.ndata['n1'], n1)
assert F.allclose(g.ndata["n1"], n1)
# with id in nx edge feature, e1 should follow original order
assert F.allclose(g.edata['e1'], e1)
assert F.array_equal(F.astype(g.edata['id'], F.int64), F.copy_to(F.arange(0, 4), F.cpu()))
assert F.allclose(g.edata["e1"], e1)
assert F.array_equal(
F.astype(g.edata["id"], F.int64), F.copy_to(F.arange(0, 4), F.cpu())
)
# test conversion after modifying DGLGraph
g.edata.pop('id') # pop id so we don't need to provide id when adding edges
g.edata.pop("id") # pop id so we don't need to provide id when adding edges
new_n = F.randn((2, 3))
new_e = F.randn((3, 5))
g.add_nodes(2, data={'n1': new_n})
g.add_nodes(2, data={"n1": new_n})
# add three edges, one is a multi-edge
g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
g.add_edges([3, 6, 0], [4, 5, 2], data={"e1": new_e})
n1 = F.cat((n1, new_n), 0)
e1 = F.cat((e1, new_e), 0)
# convert to networkx again
nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
nxg = g.to_networkx(node_attrs=["n1"], edge_attrs=["e1"])
assert len(nxg) == 7
assert nxg.size() == 7
_check_nx_feature(nxg, {'n1': n1}, {'e1': e1})
_check_nx_feature(nxg, {"n1": n1}, {"e1": e1})
# now test convert from networkx without id in edge feature
# first pop id in edge feature
for _, _, attr in nxg.edges(data=True):
attr.pop('id')
attr.pop("id")
# test with a new graph
g = dgl.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1"])
# check graph size
assert g.number_of_nodes() == 7
assert g.number_of_edges() == 7
......@@ -264,57 +292,63 @@ def _test_nx_conversion():
assert len(g.ndata) == 1
assert len(g.edata) == 1
# check feature values
assert F.allclose(g.ndata['n1'], n1)
assert F.allclose(g.ndata["n1"], n1)
# edge feature order follows nxg.edges()
edge_feat = []
for _, _, attr in nxg.edges(data=True):
edge_feat.append(F.unsqueeze(attr['e1'], 0))
edge_feat.append(F.unsqueeze(attr["e1"], 0))
edge_feat = F.cat(edge_feat, 0)
assert F.allclose(g.edata['e1'], edge_feat)
assert F.allclose(g.edata["e1"], edge_feat)
# Test converting from a networkx graph whose nodes are
# not labeled with consecutive-integers.
nxg = nx.cycle_graph(5)
nxg.remove_nodes_from([0, 4])
for u in nxg.nodes():
nxg.nodes[u]['h'] = F.tensor([u])
nxg.nodes[u]["h"] = F.tensor([u])
for u, v, d in nxg.edges(data=True):
d['h'] = F.tensor([u, v])
d["h"] = F.tensor([u, v])
g = dgl.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
g = dgl.from_networkx(nxg, node_attrs=["h"], edge_attrs=["h"])
assert g.number_of_nodes() == 3
assert g.number_of_edges() == 4
assert g.has_edge_between(0, 1)
assert g.has_edge_between(1, 2)
assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.],
[2., 3.], [2., 3.]]))
assert F.allclose(g.ndata["h"], F.tensor([[1.0], [2.0], [3.0]]))
assert F.allclose(
g.edata["h"], F.tensor([[1.0, 2.0], [1.0, 2.0], [2.0, 3.0], [2.0, 3.0]])
)
@parametrize_idtype
def test_apply_nodes(idtype):
def _upd(nodes):
return {'h' : nodes.data['h'] * 2}
return {"h": nodes.data["h"] * 2}
g = generate_graph(idtype)
old = g.ndata['h']
old = g.ndata["h"]
g.apply_nodes(_upd)
assert F.allclose(old * 2, g.ndata['h'])
assert F.allclose(old * 2, g.ndata["h"])
u = F.tensor([0, 3, 4, 6], g.idtype)
g.apply_nodes(lambda nodes : {'h' : nodes.data['h'] * 0.}, u)
assert F.allclose(F.gather_row(g.ndata['h'], u), F.zeros((4, D)))
g.apply_nodes(lambda nodes: {"h": nodes.data["h"] * 0.0}, u)
assert F.allclose(F.gather_row(g.ndata["h"], u), F.zeros((4, D)))
@parametrize_idtype
def test_apply_edges(idtype):
def _upd(edges):
return {'w' : edges.data['w'] * 2}
return {"w": edges.data["w"] * 2}
g = generate_graph(idtype)
old = g.edata['w']
old = g.edata["w"]
g.apply_edges(_upd)
assert F.allclose(old * 2, g.edata['w'])
assert F.allclose(old * 2, g.edata["w"])
u = F.tensor([0, 0, 0, 4, 5, 6], g.idtype)
v = F.tensor([1, 2, 3, 9, 9, 9], g.idtype)
g.apply_edges(lambda edges : {'w' : edges.data['w'] * 0.}, (u, v))
g.apply_edges(lambda edges: {"w": edges.data["w"] * 0.0}, (u, v))
eid = F.tensor(g.edge_ids(u, v))
assert F.allclose(F.gather_row(g.edata['w'], eid), F.zeros((6, D)))
assert F.allclose(F.gather_row(g.edata["w"], eid), F.zeros((6, D)))
@parametrize_idtype
def test_update_routines(idtype):
......@@ -325,7 +359,7 @@ def test_update_routines(idtype):
u = [0, 0, 0, 4, 5, 6]
v = [1, 2, 3, 9, 9, 9]
g.send_and_recv((u, v), message_func, reduce_func, apply_node_func)
assert(reduce_msg_shapes == {(1, 3, D), (3, 1, D)})
assert reduce_msg_shapes == {(1, 3, D), (3, 1, D)}
reduce_msg_shapes.clear()
try:
g.send_and_recv([u, v])
......@@ -337,70 +371,82 @@ def test_update_routines(idtype):
v = F.tensor([1, 2, 3, 9], g.idtype)
reduce_msg_shapes.clear()
g.pull(v, message_func, reduce_func, apply_node_func)
assert(reduce_msg_shapes == {(1, 8, D), (3, 1, D)})
assert reduce_msg_shapes == {(1, 8, D), (3, 1, D)}
reduce_msg_shapes.clear()
# push
v = F.tensor([0, 1, 2, 3], g.idtype)
reduce_msg_shapes.clear()
g.push(v, message_func, reduce_func, apply_node_func)
assert(reduce_msg_shapes == {(1, 3, D), (8, 1, D)})
assert reduce_msg_shapes == {(1, 3, D), (8, 1, D)}
reduce_msg_shapes.clear()
# update_all
reduce_msg_shapes.clear()
g.update_all(message_func, reduce_func, apply_node_func)
assert(reduce_msg_shapes == {(1, 8, D), (9, 1, D)})
assert reduce_msg_shapes == {(1, 8, D), (9, 1, D)}
reduce_msg_shapes.clear()
@parametrize_idtype
def test_update_all_0deg(idtype):
# test#1
g = dgl.graph(([1, 2, 3, 4], [0, 0, 0, 0]), idtype=idtype, device=F.ctx())
def _message(edges):
return {'m' : edges.src['h']}
return {"m": edges.src["h"]}
def _reduce(nodes):
return {'x' : nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}
return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)}
def _apply(nodes):
return {'x' : nodes.data['x'] * 2}
return {"x": nodes.data["x"] * 2}
def _init2(shape, dtype, ctx, ids):
return 2 + F.zeros(shape, dtype, ctx)
g.set_n_initializer(_init2, 'x')
g.set_n_initializer(_init2, "x")
old_repr = F.randn((5, 5))
g.ndata['h'] = old_repr
g.ndata["h"] = old_repr
g.update_all(_message, _reduce, _apply)
new_repr = g.ndata['x']
new_repr = g.ndata["x"]
# the first row of the new_repr should be the sum of all the node
# features; while the 0-deg nodes should be initialized by the
# initializer and applied with UDF.
assert F.allclose(new_repr[1:], 2*(2+F.zeros((4,5))))
assert F.allclose(new_repr[1:], 2 * (2 + F.zeros((4, 5))))
assert F.allclose(new_repr[0], 2 * F.sum(old_repr, 0))
# test#2: graph with no edge
g = dgl.graph(([], []), num_nodes=5, idtype=idtype, device=F.ctx())
g.ndata['h'] = old_repr
g.update_all(_message, _reduce, lambda nodes : {'h' : nodes.data['h'] * 2})
new_repr = g.ndata['h']
g.ndata["h"] = old_repr
g.update_all(_message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2})
new_repr = g.ndata["h"]
# should fallback to apply
assert F.allclose(new_repr, 2*old_repr)
assert F.allclose(new_repr, 2 * old_repr)
@parametrize_idtype
def test_pull_0deg(idtype):
g = dgl.graph(([0], [1]), idtype=idtype, device=F.ctx())
def _message(edges):
return {'m' : edges.src['h']}
return {"m": edges.src["h"]}
def _reduce(nodes):
return {'x' : nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}
return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)}
def _apply(nodes):
return {'x' : nodes.data['x'] * 2}
return {"x": nodes.data["x"] * 2}
def _init2(shape, dtype, ctx, ids):
return 2 + F.zeros(shape, dtype, ctx)
g.set_n_initializer(_init2, 'x')
g.set_n_initializer(_init2, "x")
# test#1: pull both 0deg and non-0deg nodes
old = F.randn((2, 5))
g.ndata['h'] = old
g.ndata["h"] = old
g.pull([0, 1], _message, _reduce, _apply)
new = g.ndata['x']
new = g.ndata["x"]
# 0deg check: initialized with the func and got applied
assert F.allclose(new[0], F.full_1d(5, 4, dtype=F.float32))
# non-0deg check
......@@ -408,14 +454,15 @@ def test_pull_0deg(idtype):
# test#2: pull only 0deg node
old = F.randn((2, 5))
g.ndata['h'] = old
g.pull(0, _message, _reduce, lambda nodes : {'h' : nodes.data['h'] * 2})
new = g.ndata['h']
g.ndata["h"] = old
g.pull(0, _message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2})
new = g.ndata["h"]
# 0deg check: fallback to apply
assert F.allclose(new[0], 2*old[0])
assert F.allclose(new[0], 2 * old[0])
# non-0deg check: not touched
assert F.allclose(new[1], old[1])
def test_dynamic_addition():
N = 3
D = 1
......@@ -425,201 +472,242 @@ def test_dynamic_addition():
# Test node addition
g.add_nodes(N)
g.ndata.update({'h1': F.randn((N, D)),
'h2': F.randn((N, D))})
g.ndata.update({"h1": F.randn((N, D)), "h2": F.randn((N, D))})
g.add_nodes(3)
assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3
assert g.ndata["h1"].shape[0] == g.ndata["h2"].shape[0] == N + 3
# Test edge addition
g.add_edges(0, 1)
g.add_edges(1, 0)
g.edata.update({'h1': F.randn((2, D)),
'h2': F.randn((2, D))})
assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2
g.edata.update({"h1": F.randn((2, D)), "h2": F.randn((2, D))})
assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 2
g.add_edges([0, 2], [2, 0])
g.edata['h1'] = F.randn((4, D))
assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4
g.edata["h1"] = F.randn((4, D))
assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 4
g.add_edges(1, 2)
g.edges[4].data['h1'] = F.randn((1, D))
assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5
g.edges[4].data["h1"] = F.randn((1, D))
assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 5
# test add edge with part of the features
g.add_edges(2, 1, {'h1': F.randn((1, D))})
assert len(g.edata['h1']) == len(g.edata['h2'])
g.add_edges(2, 1, {"h1": F.randn((1, D))})
assert len(g.edata["h1"]) == len(g.edata["h2"])
@parametrize_idtype
def test_repr(idtype):
g = dgl.graph(([0, 0, 1], [1, 2, 2]), num_nodes=10, idtype=idtype, device=F.ctx())
g = dgl.graph(
([0, 0, 1], [1, 2, 2]), num_nodes=10, idtype=idtype, device=F.ctx()
)
repr_string = g.__repr__()
print(repr_string)
g.ndata['x'] = F.zeros((10, 5))
g.edata['y'] = F.zeros((3, 4))
g.ndata["x"] = F.zeros((10, 5))
g.edata["y"] = F.zeros((3, 4))
repr_string = g.__repr__()
print(repr_string)
@parametrize_idtype
def test_local_var(idtype):
g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx())
g.ndata['h'] = F.zeros((g.number_of_nodes(), 3))
g.edata['w'] = F.zeros((g.number_of_edges(), 4))
g.ndata["h"] = F.zeros((g.number_of_nodes(), 3))
g.edata["w"] = F.zeros((g.number_of_edges(), 4))
# test override
def foo(g):
g = g.local_var()
g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
g.edata['w'] = F.ones((g.number_of_edges(), 4))
g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
g.edata["w"] = F.ones((g.number_of_edges(), 4))
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test out-place update
def foo(g):
g = g.local_var()
g.nodes[[2, 3]].data['h'] = F.ones((2, 3))
g.edges[[2, 3]].data['w'] = F.ones((2, 4))
g.nodes[[2, 3]].data["h"] = F.ones((2, 3))
g.edges[[2, 3]].data["w"] = F.ones((2, 4))
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test out-place update 2
def foo(g):
g = g.local_var()
g.apply_nodes(lambda nodes: {'h' : nodes.data['h'] + 10}, [2, 3])
g.apply_edges(lambda edges: {'w' : edges.data['w'] + 10}, [2, 3])
g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3])
g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3])
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test auto-pop
def foo(g):
g = g.local_var()
g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
g.edata['ww'] = F.ones((g.number_of_edges(), 4))
g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
g.edata["ww"] = F.ones((g.number_of_edges(), 4))
foo(g)
assert 'hh' not in g.ndata
assert 'ww' not in g.edata
assert "hh" not in g.ndata
assert "ww" not in g.edata
# test initializer1
g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx())
g.set_n_initializer(dgl.init.zero_initializer)
def foo(g):
g = g.local_var()
g.nodes[0].data['h'] = F.ones((1, 1))
assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
g.nodes[0].data["h"] = F.ones((1, 1))
assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]]))
foo(g)
# test initializer2
def foo_e_initializer(shape, dtype, ctx, id_range):
return F.ones(shape)
g.set_e_initializer(foo_e_initializer, field='h')
g.set_e_initializer(foo_e_initializer, field="h")
def foo(g):
g = g.local_var()
g.edges[0, 1].data['h'] = F.ones((1, 1))
assert F.allclose(g.edata['h'], F.ones((2, 1)))
g.edges[0, 1].data['w'] = F.ones((1, 1))
assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
g.edges[0, 1].data["h"] = F.ones((1, 1))
assert F.allclose(g.edata["h"], F.ones((2, 1)))
g.edges[0, 1].data["w"] = F.ones((1, 1))
assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]]))
foo(g)
@parametrize_idtype
def test_local_scope(idtype):
g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx())
g.ndata['h'] = F.zeros((g.number_of_nodes(), 3))
g.edata['w'] = F.zeros((g.number_of_edges(), 4))
g.ndata["h"] = F.zeros((g.number_of_nodes(), 3))
g.edata["w"] = F.zeros((g.number_of_edges(), 4))
# test override
def foo(g):
with g.local_scope():
g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
g.edata['w'] = F.ones((g.number_of_edges(), 4))
g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
g.edata["w"] = F.ones((g.number_of_edges(), 4))
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test out-place update
def foo(g):
with g.local_scope():
g.nodes[[2, 3]].data['h'] = F.ones((2, 3))
g.edges[[2, 3]].data['w'] = F.ones((2, 4))
g.nodes[[2, 3]].data["h"] = F.ones((2, 3))
g.edges[[2, 3]].data["w"] = F.ones((2, 4))
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test out-place update 2
def foo(g):
with g.local_scope():
g.apply_nodes(lambda nodes: {'h' : nodes.data['h'] + 10}, [2, 3])
g.apply_edges(lambda edges: {'w' : edges.data['w'] + 10}, [2, 3])
g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3])
g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3])
foo(g)
assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
# test auto-pop
def foo(g):
with g.local_scope():
g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
g.edata['ww'] = F.ones((g.number_of_edges(), 4))
g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
g.edata["ww"] = F.ones((g.number_of_edges(), 4))
foo(g)
assert 'hh' not in g.ndata
assert 'ww' not in g.edata
assert "hh" not in g.ndata
assert "ww" not in g.edata
# test nested scope
def foo(g):
with g.local_scope():
g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
g.edata['ww'] = F.ones((g.number_of_edges(), 4))
g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
g.edata["ww"] = F.ones((g.number_of_edges(), 4))
with g.local_scope():
g.ndata['hhh'] = F.ones((g.number_of_nodes(), 3))
g.edata['www'] = F.ones((g.number_of_edges(), 4))
assert 'hhh' not in g.ndata
assert 'www' not in g.edata
g.ndata["hhh"] = F.ones((g.number_of_nodes(), 3))
g.edata["www"] = F.ones((g.number_of_edges(), 4))
assert "hhh" not in g.ndata
assert "www" not in g.edata
foo(g)
assert 'hh' not in g.ndata
assert 'ww' not in g.edata
assert "hh" not in g.ndata
assert "ww" not in g.edata
# test initializer1
g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx())
g.set_n_initializer(dgl.init.zero_initializer)
def foo(g):
with g.local_scope():
g.nodes[0].data['h'] = F.ones((1, 1))
assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
g.nodes[0].data["h"] = F.ones((1, 1))
assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]]))
foo(g)
# test initializer2
def foo_e_initializer(shape, dtype, ctx, id_range):
return F.ones(shape)
g.set_e_initializer(foo_e_initializer, field='h')
g.set_e_initializer(foo_e_initializer, field="h")
def foo(g):
with g.local_scope():
g.edges[0, 1].data['h'] = F.ones((1, 1))
assert F.allclose(g.edata['h'], F.ones((2, 1)))
g.edges[0, 1].data['w'] = F.ones((1, 1))
assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
g.edges[0, 1].data["h"] = F.ones((1, 1))
assert F.allclose(g.edata["h"], F.ones((2, 1)))
g.edges[0, 1].data["w"] = F.ones((1, 1))
assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]]))
foo(g)
@parametrize_idtype
def test_isolated_nodes(idtype):
g = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=F.ctx())
assert g.number_of_nodes() == 5
g = dgl.heterograph({
('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])
}, {'user': 5, 'game': 7}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])},
{"user": 5, "game": 7},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype
assert g.number_of_nodes('user') == 5
assert g.number_of_nodes('game') == 7
assert g.number_of_nodes("user") == 5
assert g.number_of_nodes("game") == 7
# Test backward compatibility
g = dgl.heterograph({
('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])
}, {'user': 5, 'game': 7}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])},
{"user": 5, "game": 7},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype
assert g.number_of_nodes('user') == 5
assert g.number_of_nodes('game') == 7
assert g.number_of_nodes("user") == 5
assert g.number_of_nodes("game") == 7
@parametrize_idtype
def test_send_multigraph(idtype):
g = dgl.graph(([0, 0, 0, 2], [1, 1, 1, 1]), idtype=idtype, device=F.ctx())
def _message_a(edges):
return {'a': edges.data['a']}
return {"a": edges.data["a"]}
def _message_b(edges):
return {'a': edges.data['a'] * 3}
return {"a": edges.data["a"] * 3}
def _reduce(nodes):
return {'a': F.max(nodes.mailbox['a'], 1)}
return {"a": F.max(nodes.mailbox["a"], 1)}
def answer(*args):
return F.max(F.stack(args, 0), 0)
......@@ -629,46 +717,60 @@ def test_send_multigraph(idtype):
# send by eid
old_repr = F.randn((4, 5))
# send_and_recv_on
g.ndata['a'] = F.zeros((3, 5))
g.edata['a'] = old_repr
g.ndata["a"] = F.zeros((3, 5))
g.edata["a"] = old_repr
g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce)
new_repr = g.ndata['a']
assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3]))
new_repr = g.ndata["a"]
assert F.allclose(
new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])
)
assert F.allclose(new_repr[[0, 2]], F.zeros((2, 5)))
@parametrize_idtype
def test_issue_1088(idtype):
# This test ensures that message passing on a heterograph with one edge type
# would not crash (GitHub issue #1088).
import dgl.function as fn
g = dgl.heterograph({('U', 'E', 'V'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype, device=F.ctx())
g.nodes['U'].data['x'] = F.randn((3, 3))
g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'y'))
g = dgl.heterograph(
{("U", "E", "V"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype, device=F.ctx()
)
g.nodes["U"].data["x"] = F.randn((3, 3))
g.update_all(fn.copy_u("x", "m"), fn.sum("m", "y"))
@parametrize_idtype
def test_degree_bucket_edge_ordering(idtype):
import dgl.function as fn
g = dgl.graph(
([1, 3, 5, 0, 4, 2, 3, 3, 4, 5], [1, 1, 0, 0, 1, 2, 2, 0, 3, 3]),
idtype=idtype, device=F.ctx())
g.edata['eid'] = F.copy_to(F.arange(0, 10), F.ctx())
idtype=idtype,
device=F.ctx(),
)
g.edata["eid"] = F.copy_to(F.arange(0, 10), F.ctx())
def reducer(nodes):
eid = F.asnumpy(F.copy_to(nodes.mailbox['eid'], F.cpu()))
eid = F.asnumpy(F.copy_to(nodes.mailbox["eid"], F.cpu()))
assert np.array_equal(eid, np.sort(eid, 1))
return {'n': F.sum(nodes.mailbox['eid'], 1)}
g.update_all(fn.copy_e('eid', 'eid'), reducer)
return {"n": F.sum(nodes.mailbox["eid"], 1)}
g.update_all(fn.copy_e("eid", "eid"), reducer)
@parametrize_idtype
def test_issue_2484(idtype):
import dgl.function as fn
g = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx())
x = F.copy_to(F.randn((4,)), F.ctx())
g.ndata['x'] = x
g.pull([2, 1], fn.u_add_v('x', 'x', 'm'), fn.sum('m', 'x'))
y1 = g.ndata['x']
g.ndata["x"] = x
g.pull([2, 1], fn.u_add_v("x", "x", "m"), fn.sum("m", "x"))
y1 = g.ndata["x"]
g.ndata['x'] = x
g.pull([1, 2], fn.u_add_v('x', 'x', 'm'), fn.sum('m', 'x'))
y2 = g.ndata['x']
g.ndata["x"] = x
g.pull([1, 2], fn.u_add_v("x", "x", "m"), fn.sum("m", "x"))
y2 = g.ndata["x"]
assert F.allclose(y1, y2)
......@@ -4,18 +4,18 @@ import unittest
from collections import Counter
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as ssp
import test_utils
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
from dgl.ops import edge_softmax
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
edge_softmax_shapes = [(1,), (1, 3), (3, 4, 5)]
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
......
import dgl
import backend as F
import numpy as np
import unittest
from collections import defaultdict
import backend as F
import dgl
import numpy as np
import pytest
def check_random_walk(g, metapath, traces, ntypes, prob=None, trace_eids=None):
traces = F.asnumpy(traces)
ntypes = F.asnumpy(ntypes)
for j in range(traces.shape[1] - 1):
assert ntypes[j] == g.get_ntype_id(g.to_canonical_etype(metapath[j])[0])
assert ntypes[j + 1] == g.get_ntype_id(g.to_canonical_etype(metapath[j])[2])
assert ntypes[j + 1] == g.get_ntype_id(
g.to_canonical_etype(metapath[j])[2]
)
for i in range(traces.shape[0]):
for j in range(traces.shape[1] - 1):
assert g.has_edges_between(
traces[i, j], traces[i, j+1], etype=metapath[j])
traces[i, j], traces[i, j + 1], etype=metapath[j]
)
if prob is not None and prob in g.edges[metapath[j]].data:
p = F.asnumpy(g.edges[metapath[j]].data['p'])
eids = g.edge_ids(traces[i, j], traces[i, j+1], etype=metapath[j])
p = F.asnumpy(g.edges[metapath[j]].data["p"])
eids = g.edge_ids(
traces[i, j], traces[i, j + 1], etype=metapath[j]
)
assert p[eids] != 0
if trace_eids is not None:
u, v = g.find_edges(trace_eids[i, j], etype=metapath[j])
assert (u == traces[i, j]) and (v == traces[i, j + 1])
@pytest.mark.parametrize('use_uva', [True, False])
@pytest.mark.parametrize("use_uva", [True, False])
def test_non_uniform_random_walk(use_uva):
if use_uva:
if F.ctx() == F.cpu():
pytest.skip('UVA biased random walk requires a GPU.')
if dgl.backend.backend_name != 'pytorch':
pytest.skip('UVA biased random walk is only supported with PyTorch.')
g2 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
})
g4 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])
})
g2.edata['p'] = F.copy_to(F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu())
g2.edata['p2'] = F.copy_to(F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32), F.cpu())
g4.edges['follow'].data['p'] = F.copy_to(F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu())
g4.edges['viewed-by'].data['p'] = F.copy_to(F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32), F.cpu())
pytest.skip("UVA biased random walk requires a GPU.")
if dgl.backend.backend_name != "pytorch":
pytest.skip(
"UVA biased random walk is only supported with PyTorch."
)
g2 = dgl.heterograph(
{("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
)
g4 = dgl.heterograph(
{
("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
("item", "viewed-by", "user"): (
[0, 1, 1, 2, 2, 1],
[0, 0, 1, 2, 3, 3],
),
}
)
g2.edata["p"] = F.copy_to(
F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu()
)
g2.edata["p2"] = F.copy_to(
F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32), F.cpu()
)
g4.edges["follow"].data["p"] = F.copy_to(
F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu()
)
g4.edges["viewed-by"].data["p"] = F.copy_to(
F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32), F.cpu()
)
if use_uva:
for g in (g2, g4):
g.create_formats_()
g.pin_memory_()
elif F._default_context_str == 'gpu':
elif F._default_context_str == "gpu":
g2 = g2.to(F.ctx())
g4 = g4.to(F.ctx())
try:
traces, eids, ntypes = dgl.sampling.random_walk(
g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
length=4, prob='p', return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
g2,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
length=4,
prob="p",
return_eids=True,
)
check_random_walk(
g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids
)
with pytest.raises(dgl.DGLError):
traces, ntypes = dgl.sampling.random_walk(
g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
length=4, prob='p2')
g2,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
length=4,
prob="p2",
)
metapath = ['follow', 'view', 'viewed-by'] * 2
metapath = ["follow", "view", "viewed-by"] * 2
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath, prob='p', return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
g4,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath,
prob="p",
return_eids=True,
)
check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath, prob='p', restart_prob=0., return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
g4,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath,
prob="p",
restart_prob=0.0,
return_eids=True,
)
check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath, prob='p',
restart_prob=F.zeros((6,), F.float32, F.ctx()), return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
g4,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath,
prob="p",
restart_prob=F.zeros((6,), F.float32, F.ctx()),
return_eids=True,
)
check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath + ['follow'], prob='p',
restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True)
check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids)
g4,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath + ["follow"],
prob="p",
restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32),
return_eids=True,
)
check_random_walk(
g4, metapath, traces[:, :7], ntypes[:7], "p", trace_eids=eids
)
assert (F.asnumpy(traces[:, 7]) == -1).all()
finally:
for g in (g2, g4):
g.unpin_memory_()
@pytest.mark.parametrize('use_uva', [True, False])
@pytest.mark.parametrize("use_uva", [True, False])
def test_uniform_random_walk(use_uva):
if use_uva and F.ctx() == F.cpu():
pytest.skip('UVA random walk requires a GPU.')
g1 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
})
g2 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
})
g3 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0]),
('user', 'view', 'item'): ([0, 1, 2], [0, 1, 2]),
('item', 'viewed-by', 'user'): ([0, 1, 2], [0, 1, 2])
})
g4 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])
})
pytest.skip("UVA random walk requires a GPU.")
g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])})
g2 = dgl.heterograph(
{("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
)
g3 = dgl.heterograph(
{
("user", "follow", "user"): ([0, 1, 2], [1, 2, 0]),
("user", "view", "item"): ([0, 1, 2], [0, 1, 2]),
("item", "viewed-by", "user"): ([0, 1, 2], [0, 1, 2]),
}
)
g4 = dgl.heterograph(
{
("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
("item", "viewed-by", "user"): (
[0, 1, 1, 2, 2, 1],
[0, 0, 1, 2, 3, 3],
),
}
)
if use_uva:
for g in (g1, g2, g3, g4):
g.create_formats_()
g.pin_memory_()
elif F._default_context_str == 'gpu':
elif F._default_context_str == "gpu":
g1 = g1.to(F.ctx())
g2 = g2.to(F.ctx())
g3 = g3.to(F.ctx())
......@@ -121,118 +179,190 @@ def test_uniform_random_walk(use_uva):
try:
traces, eids, ntypes = dgl.sampling.random_walk(
g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
if F._default_context_str == 'cpu':
g1,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
length=4,
return_eids=True,
)
check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)
if F._default_context_str == "cpu":
with pytest.raises(dgl.DGLError):
dgl.sampling.random_walk(g1, F.tensor([0, 1, 2, 10], dtype=g1.idtype), length=4, return_eids=True)
dgl.sampling.random_walk(
g1,
F.tensor([0, 1, 2, 10], dtype=g1.idtype),
length=4,
return_eids=True,
)
traces, eids, ntypes = dgl.sampling.random_walk(
g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=0., return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
g1,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
length=4,
restart_prob=0.0,
return_eids=True,
)
check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk(
g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=F.zeros((4,), F.float32))
check_random_walk(g1, ['follow'] * 4, traces, ntypes)
g1,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
length=4,
restart_prob=F.zeros((4,), F.float32),
)
check_random_walk(g1, ["follow"] * 4, traces, ntypes)
traces, ntypes = dgl.sampling.random_walk(
g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=5,
restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32))
g1,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
length=5,
restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32),
)
check_random_walk(
g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5))
g1,
["follow"] * 4,
F.slice_axis(traces, 1, 0, 5),
F.slice_axis(ntypes, 0, 0, 5),
)
assert (F.asnumpy(traces)[:, 5] == -1).all()
traces, eids, ntypes = dgl.sampling.random_walk(
g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype), length=4, return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids)
metapath = ['follow', 'view', 'viewed-by'] * 2
g2,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
length=4,
return_eids=True,
)
check_random_walk(g2, ["follow"] * 4, traces, ntypes, trace_eids=eids)
metapath = ["follow", "view", "viewed-by"] * 2
traces, eids, ntypes = dgl.sampling.random_walk(
g3, F.tensor([0, 1, 2, 0, 1, 2], dtype=g3.idtype), metapath=metapath, return_eids=True)
g3,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g3.idtype),
metapath=metapath,
return_eids=True,
)
check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids)
metapath = ['follow', 'view', 'viewed-by'] * 2
metapath = ["follow", "view", "viewed-by"] * 2
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, return_eids=True)
g4,
F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
metapath=metapath,
return_eids=True,
)
check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
traces, eids, ntypes = dgl.sampling.random_walk(
g4, F.tensor([0, 1, 2, 0, 1, 2], dtype=g4.idtype), metapath=metapath, return_eids=True)
g4,
F.tensor([0, 1, 2, 0, 1, 2], dtype=g4.idtype),
metapath=metapath,
return_eids=True,
)
check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
finally: # make sure to unpin the graphs even if some test fails
finally: # make sure to unpin the graphs even if some test fails
for g in (g1, g2, g3, g4):
if g.is_pinned():
g.unpin_memory_()
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU random walk not implemented"
)
def test_node2vec():
g1 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
})
g2 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
})
g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])})
g2 = dgl.heterograph(
{("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
)
g2.edata["p"] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
ntypes = F.zeros((5,), dtype=F.int64)
traces, eids = dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
traces, eids = dgl.sampling.node2vec_random_walk(
g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True
)
check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)
traces, eids = dgl.sampling.node2vec_random_walk(
g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob='p', return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob="p", return_eids=True
)
check_random_walk(g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU pack traces not implemented")
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU pack traces not implemented"
)
def test_pack_traces():
traces, types = (np.array(
[[ 0, 1, -1, -1, -1, -1, -1],
[ 0, 1, 1, 3, 0, 0, 0]], dtype='int64'),
np.array([0, 0, 1, 0, 0, 1, 0], dtype='int64'))
traces, types = (
np.array(
[[0, 1, -1, -1, -1, -1, -1], [0, 1, 1, 3, 0, 0, 0]], dtype="int64"
),
np.array([0, 0, 1, 0, 0, 1, 0], dtype="int64"),
)
traces = F.zerocopy_from_numpy(traces)
types = F.zerocopy_from_numpy(types)
result = dgl.sampling.pack_traces(traces, types)
assert F.array_equal(result[0], F.tensor([0, 1, 0, 1, 1, 3, 0, 0, 0], dtype=F.int64))
assert F.array_equal(result[1], F.tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], dtype=F.int64))
assert F.array_equal(
result[0], F.tensor([0, 1, 0, 1, 1, 3, 0, 0, 0], dtype=F.int64)
)
assert F.array_equal(
result[1], F.tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], dtype=F.int64)
)
assert F.array_equal(result[2], F.tensor([2, 7], dtype=F.int64))
assert F.array_equal(result[3], F.tensor([0, 2], dtype=F.int64))
@pytest.mark.parametrize('use_uva', [True, False])
@pytest.mark.parametrize("use_uva", [True, False])
def test_pinsage_sampling(use_uva):
if use_uva and F.ctx() == F.cpu():
pytest.skip('UVA sampling requires a GPU.')
pytest.skip("UVA sampling requires a GPU.")
def _test_sampler(g, sampler, ntype):
seeds = F.copy_to(F.tensor([0, 2], dtype=g.idtype), F.ctx())
neighbor_g = sampler(seeds)
assert neighbor_g.ntypes == [ntype]
u, v = neighbor_g.all_edges(form='uv', order='eid')
u, v = neighbor_g.all_edges(form="uv", order="eid")
uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist()))
assert (1, 0) in uv or (0, 0) in uv
assert (2, 2) in uv or (3, 2) in uv
g = dgl.heterograph({
('item', 'bought-by', 'user'): ([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3]),
('user', 'bought', 'item'): ([0, 1, 0, 1, 2, 3, 2, 3], [0, 0, 1, 1, 2, 2, 3, 3])})
g = dgl.heterograph(
{
("item", "bought-by", "user"): (
[0, 0, 1, 1, 2, 2, 3, 3],
[0, 1, 0, 1, 2, 3, 2, 3],
),
("user", "bought", "item"): (
[0, 1, 0, 1, 2, 3, 2, 3],
[0, 0, 1, 1, 2, 2, 3, 3],
),
}
)
if use_uva:
g.create_formats_()
g.pin_memory_()
elif F._default_context_str == 'gpu':
elif F._default_context_str == "gpu":
g = g.to(F.ctx())
try:
sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2)
_test_sampler(g, sampler, 'item')
sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['bought-by', 'bought'])
_test_sampler(g, sampler, 'item')
sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
[('item', 'bought-by', 'user'), ('user', 'bought', 'item')])
_test_sampler(g, sampler, 'item')
sampler = dgl.sampling.PinSAGESampler(g, "item", "user", 4, 0.5, 3, 2)
_test_sampler(g, sampler, "item")
sampler = dgl.sampling.RandomWalkNeighborSampler(
g, 4, 0.5, 3, 2, ["bought-by", "bought"]
)
_test_sampler(g, sampler, "item")
sampler = dgl.sampling.RandomWalkNeighborSampler(
g,
4,
0.5,
3,
2,
[("item", "bought-by", "user"), ("user", "bought", "item")],
)
_test_sampler(g, sampler, "item")
finally:
if g.is_pinned():
g.unpin_memory_()
g = dgl.graph(([0, 0, 1, 1, 2, 2, 3, 3],
[0, 1, 0, 1, 2, 3, 2, 3]))
g = dgl.graph(([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3]))
if use_uva:
g.create_formats_()
g.pin_memory_()
elif F._default_context_str == 'gpu':
elif F._default_context_str == "gpu":
g = g.to(F.ctx())
try:
sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2)
......@@ -241,69 +371,116 @@ def test_pinsage_sampling(use_uva):
if g.is_pinned():
g.unpin_memory_()
g = dgl.heterograph({
('A', 'AB', 'B'): ([0, 2], [1, 3]),
('B', 'BC', 'C'): ([1, 3], [2, 1]),
('C', 'CA', 'A'): ([2, 1], [0, 2])})
g = dgl.heterograph(
{
("A", "AB", "B"): ([0, 2], [1, 3]),
("B", "BC", "C"): ([1, 3], [2, 1]),
("C", "CA", "A"): ([2, 1], [0, 2]),
}
)
if use_uva:
g.create_formats_()
g.pin_memory_()
elif F._default_context_str == 'gpu':
elif F._default_context_str == "gpu":
g = g.to(F.ctx())
try:
sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['AB', 'BC', 'CA'])
_test_sampler(g, sampler, 'A')
sampler = dgl.sampling.RandomWalkNeighborSampler(
g, 4, 0.5, 3, 2, ["AB", "BC", "CA"]
)
_test_sampler(g, sampler, "A")
finally:
if g.is_pinned():
g.unpin_memory_()
def _gen_neighbor_sampling_test_graph(hypersparse, reverse):
if hypersparse:
# should crash if allocated a CSR
card = 1 << 50
num_nodes_dict = {'user': card, 'game': card, 'coin': card}
num_nodes_dict = {"user": card, "game": card, "coin": card}
else:
card = None
num_nodes_dict = None
if reverse:
g = dgl.heterograph({
('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0])
}, {'user': card if card is not None else 4})
g = dgl.heterograph(
{
("user", "follow", "user"): (
[0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0],
)
},
{"user": card if card is not None else 4},
)
g = g.to(F.ctx())
g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
g.edata['mask'] = F.tensor([True, True, False, True, True, False, True])
hg = dgl.heterograph({
('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0]),
('game', 'play', 'user'): ([0, 1, 2, 2], [0, 0, 1, 3]),
('user', 'liked-by', 'game'): ([0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0]),
('coin', 'flips', 'user'): ([0, 0, 0, 0], [0, 1, 2, 3])
}, num_nodes_dict)
g.edata["prob"] = F.tensor(
[0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
)
g.edata["mask"] = F.tensor([True, True, False, True, True, False, True])
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0],
),
("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]),
("user", "liked-by", "game"): (
[0, 1, 2, 0, 3, 0],
[2, 2, 2, 1, 1, 0],
),
("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]),
},
num_nodes_dict,
)
hg = hg.to(F.ctx())
else:
g = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2])
}, {'user': card if card is not None else 4})
g = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
)
},
{"user": card if card is not None else 4},
)
g = g.to(F.ctx())
g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
g.edata['mask'] = F.tensor([True, True, False, True, True, False, True])
hg = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
}, num_nodes_dict)
g.edata["prob"] = F.tensor(
[0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
)
g.edata["mask"] = F.tensor([True, True, False, True, True, False, True])
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
),
("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
},
num_nodes_dict,
)
hg = hg.to(F.ctx())
hg.edges['follow'].data['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
hg.edges['follow'].data['mask'] = F.tensor([True, True, False, True, True, False, True])
hg.edges['play'].data['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32)
hg.edges["follow"].data["prob"] = F.tensor(
[0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
)
hg.edges["follow"].data["mask"] = F.tensor(
[True, True, False, True, True, False, True]
)
hg.edges["play"].data["prob"] = F.tensor(
[0.8, 0.5, 0.5, 0.5], dtype=F.float32
)
# Leave out the mask of play and liked-by since all of them are True anyway.
hg.edges['liked-by'].data['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
hg.edges["liked-by"].data["prob"] = F.tensor(
[0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32
)
return g, hg
def _gen_neighbor_topk_test_graph(hypersparse, reverse):
if hypersparse:
# should crash if allocated a CSR
......@@ -312,47 +489,86 @@ def _gen_neighbor_topk_test_graph(hypersparse, reverse):
card = None
if reverse:
g = dgl.heterograph({
('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0])
})
g.edata['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
hg = dgl.heterograph({
('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0]),
('game', 'play', 'user'): ([0, 1, 2, 2], [0, 0, 1, 3]),
('user', 'liked-by', 'game'): ([0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0]),
('coin', 'flips', 'user'): ([0, 0, 0, 0], [0, 1, 2, 3])
})
g = dgl.heterograph(
{
("user", "follow", "user"): (
[0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0],
)
}
)
g.edata["weight"] = F.tensor(
[0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
)
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[0, 0, 0, 1, 1, 1, 2],
[1, 2, 3, 0, 2, 3, 0],
),
("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]),
("user", "liked-by", "game"): (
[0, 1, 2, 0, 3, 0],
[2, 2, 2, 1, 1, 0],
),
("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]),
}
)
else:
g = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2])
})
g.edata['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
hg = dgl.heterograph({
('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
})
hg.edges['follow'].data['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
hg.edges['play'].data['weight'] = F.tensor([.8, .5, .4, .5], dtype=F.float32)
hg.edges['liked-by'].data['weight'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
hg.edges['flips'].data['weight'] = F.tensor([10, 2, 13, -1], dtype=F.float32)
g = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
)
}
)
g.edata["weight"] = F.tensor(
[0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
)
hg = dgl.heterograph(
{
("user", "follow", "user"): (
[1, 2, 3, 0, 2, 3, 0],
[0, 0, 0, 1, 1, 1, 2],
),
("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
("game", "liked-by", "user"): (
[2, 2, 2, 1, 1, 0],
[0, 1, 2, 0, 3, 0],
),
("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
}
)
hg.edges["follow"].data["weight"] = F.tensor(
[0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
)
hg.edges["play"].data["weight"] = F.tensor(
[0.8, 0.5, 0.4, 0.5], dtype=F.float32
)
hg.edges["liked-by"].data["weight"] = F.tensor(
[0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32
)
hg.edges["flips"].data["weight"] = F.tensor(
[10, 2, 13, -1], dtype=F.float32
)
return g, hg
def _test_sample_neighbors(hypersparse, prob):
g, hg = _gen_neighbor_sampling_test_graph(hypersparse, False)
def _test1(p, replace):
subg = dgl.sampling.sample_neighbors(g, [0, 1], -1, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
g, [0, 1], -1, prob=p, replace=replace
)
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans, e_ans = g.in_edges([0, 1], form='all')
u_ans, v_ans, e_ans = g.in_edges([0, 1], form="all")
if p is not None:
emask = F.gather_row(g.edata[p], e_ans)
if p == 'prob':
emask = (emask != 0)
if p == "prob":
emask = emask != 0
u_ans = F.boolean_mask(u_ans, emask)
v_ans = F.boolean_mask(v_ans, emask)
uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
......@@ -360,12 +576,17 @@ def _test_sample_neighbors(hypersparse, prob):
assert uv == uv_ans
for i in range(10):
subg = dgl.sampling.sample_neighbors(g, [0, 1], 2, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
g, [0, 1], 2, prob=p, replace=replace
)
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 4
u, v = subg.edges()
assert set(F.asnumpy(F.unique(v))) == {0, 1}
assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64))
assert F.array_equal(
F.astype(g.has_edges_between(u, v), F.int64),
F.ones((4,), dtype=F.int64),
)
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
if not replace:
......@@ -374,18 +595,21 @@ def _test_sample_neighbors(hypersparse, prob):
if p is not None:
assert not (3, 0) in edge_set
assert not (3, 1) in edge_set
_test1(prob, True) # w/ replacement, uniform
_test1(prob, True) # w/ replacement, uniform
_test1(prob, False) # w/o replacement, uniform
def _test2(p, replace): # fanout > #neighbors
subg = dgl.sampling.sample_neighbors(g, [0, 2], -1, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
g, [0, 2], -1, prob=p, replace=replace
)
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans, e_ans = g.in_edges([0, 2], form='all')
u_ans, v_ans, e_ans = g.in_edges([0, 2], form="all")
if p is not None:
emask = F.gather_row(g.edata[p], e_ans)
if p == 'prob':
emask = (emask != 0)
if p == "prob":
emask = emask != 0
u_ans = F.boolean_mask(u_ans, emask)
v_ans = F.boolean_mask(v_ans, emask)
uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
......@@ -393,13 +617,18 @@ def _test_sample_neighbors(hypersparse, prob):
assert uv == uv_ans
for i in range(10):
subg = dgl.sampling.sample_neighbors(g, [0, 2], 2, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
g, [0, 2], 2, prob=p, replace=replace
)
assert subg.number_of_nodes() == g.number_of_nodes()
num_edges = 4 if replace else 3
assert subg.number_of_edges() == num_edges
u, v = subg.edges()
assert set(F.asnumpy(F.unique(v))) == {0, 2}
assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64))
assert F.array_equal(
F.astype(g.has_edges_between(u, v), F.int64),
F.ones((num_edges,), dtype=F.int64),
)
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
if not replace:
......@@ -407,56 +636,65 @@ def _test_sample_neighbors(hypersparse, prob):
assert len(edge_set) == num_edges
if p is not None:
assert not (3, 0) in edge_set
_test2(prob, True) # w/ replacement, uniform
_test2(prob, True) # w/ replacement, uniform
_test2(prob, False) # w/o replacement, uniform
def _test3(p, replace):
subg = dgl.sampling.sample_neighbors(hg, {'user': [0, 1], 'game': 0}, -1, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
hg, {"user": [0, 1], "game": 0}, -1, prob=p, replace=replace
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 6 if p is None else 4
assert subg['play'].number_of_edges() == 1
assert subg['liked-by'].number_of_edges() == 4
assert subg['flips'].number_of_edges() == 0
assert subg["follow"].number_of_edges() == 6 if p is None else 4
assert subg["play"].number_of_edges() == 1
assert subg["liked-by"].number_of_edges() == 4
assert subg["flips"].number_of_edges() == 0
for i in range(10):
subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, 2, prob=p, replace=replace)
subg = dgl.sampling.sample_neighbors(
hg, {"user": [0, 1], "game": 0}, 2, prob=p, replace=replace
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 4
assert subg['play'].number_of_edges() == 2 if replace else 1
assert subg['liked-by'].number_of_edges() == 4 if replace else 3
assert subg['flips'].number_of_edges() == 0
assert subg["follow"].number_of_edges() == 4
assert subg["play"].number_of_edges() == 2 if replace else 1
assert subg["liked-by"].number_of_edges() == 4 if replace else 3
assert subg["flips"].number_of_edges() == 0
_test3(prob, True) # w/ replacement, uniform
_test3(prob, True) # w/ replacement, uniform
_test3(prob, False) # w/o replacement, uniform
# test different fanouts for different relations
for i in range(10):
subg = dgl.sampling.sample_neighbors(
hg,
{'user' : [0,1], 'game' : 0, 'coin': 0},
{'follow': 1, 'play': 2, 'liked-by': 0, 'flips': -1},
replace=True)
{"user": [0, 1], "game": 0, "coin": 0},
{"follow": 1, "play": 2, "liked-by": 0, "flips": -1},
replace=True,
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 2
assert subg['play'].number_of_edges() == 2
assert subg['liked-by'].number_of_edges() == 0
assert subg['flips'].number_of_edges() == 4
assert subg["follow"].number_of_edges() == 2
assert subg["play"].number_of_edges() == 2
assert subg["liked-by"].number_of_edges() == 0
assert subg["flips"].number_of_edges() == 4
def _test_sample_neighbors_outedge(hypersparse):
g, hg = _gen_neighbor_sampling_test_graph(hypersparse, True)
def _test1(p, replace):
subg = dgl.sampling.sample_neighbors(g, [0, 1], -1, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
g, [0, 1], -1, prob=p, replace=replace, edge_dir="out"
)
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans, e_ans = g.out_edges([0, 1], form='all')
u_ans, v_ans, e_ans = g.out_edges([0, 1], form="all")
if p is not None:
emask = F.gather_row(g.edata[p], e_ans)
if p == 'prob':
emask = (emask != 0)
if p == "prob":
emask = emask != 0
u_ans = F.boolean_mask(u_ans, emask)
v_ans = F.boolean_mask(v_ans, emask)
uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
......@@ -464,12 +702,17 @@ def _test_sample_neighbors_outedge(hypersparse):
assert uv == uv_ans
for i in range(10):
subg = dgl.sampling.sample_neighbors(g, [0, 1], 2, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
g, [0, 1], 2, prob=p, replace=replace, edge_dir="out"
)
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 4
u, v = subg.edges()
assert set(F.asnumpy(F.unique(u))) == {0, 1}
assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64))
assert F.array_equal(
F.astype(g.has_edges_between(u, v), F.int64),
F.ones((4,), dtype=F.int64),
)
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
if not replace:
......@@ -478,20 +721,23 @@ def _test_sample_neighbors_outedge(hypersparse):
if p is not None:
assert not (0, 3) in edge_set
assert not (1, 3) in edge_set
_test1(None, True) # w/ replacement, uniform
_test1(None, True) # w/ replacement, uniform
_test1(None, False) # w/o replacement, uniform
_test1('prob', True) # w/ replacement
_test1('prob', False) # w/o replacement
_test1("prob", True) # w/ replacement
_test1("prob", False) # w/o replacement
def _test2(p, replace): # fanout > #neighbors
subg = dgl.sampling.sample_neighbors(g, [0, 2], -1, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
g, [0, 2], -1, prob=p, replace=replace, edge_dir="out"
)
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans, e_ans = g.out_edges([0, 2], form='all')
u_ans, v_ans, e_ans = g.out_edges([0, 2], form="all")
if p is not None:
emask = F.gather_row(g.edata[p], e_ans)
if p == 'prob':
emask = (emask != 0)
if p == "prob":
emask = emask != 0
u_ans = F.boolean_mask(u_ans, emask)
v_ans = F.boolean_mask(v_ans, emask)
uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
......@@ -499,13 +745,18 @@ def _test_sample_neighbors_outedge(hypersparse):
assert uv == uv_ans
for i in range(10):
subg = dgl.sampling.sample_neighbors(g, [0, 2], 2, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
g, [0, 2], 2, prob=p, replace=replace, edge_dir="out"
)
assert subg.number_of_nodes() == g.number_of_nodes()
num_edges = 4 if replace else 3
assert subg.number_of_edges() == num_edges
u, v = subg.edges()
assert set(F.asnumpy(F.unique(u))) == {0, 2}
assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64))
assert F.array_equal(
F.astype(g.has_edges_between(u, v), F.int64),
F.ones((num_edges,), dtype=F.int64),
)
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
if not replace:
......@@ -513,39 +764,55 @@ def _test_sample_neighbors_outedge(hypersparse):
assert len(edge_set) == num_edges
if p is not None:
assert not (0, 3) in edge_set
_test2(None, True) # w/ replacement, uniform
_test2(None, True) # w/ replacement, uniform
_test2(None, False) # w/o replacement, uniform
_test2('prob', True) # w/ replacement
_test2('prob', False) # w/o replacement
_test2("prob", True) # w/ replacement
_test2("prob", False) # w/o replacement
def _test3(p, replace):
subg = dgl.sampling.sample_neighbors(hg, {'user': [0, 1], 'game': 0}, -1, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
hg,
{"user": [0, 1], "game": 0},
-1,
prob=p,
replace=replace,
edge_dir="out",
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 6 if p is None else 4
assert subg['play'].number_of_edges() == 1
assert subg['liked-by'].number_of_edges() == 4
assert subg['flips'].number_of_edges() == 0
assert subg["follow"].number_of_edges() == 6 if p is None else 4
assert subg["play"].number_of_edges() == 1
assert subg["liked-by"].number_of_edges() == 4
assert subg["flips"].number_of_edges() == 0
for i in range(10):
subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, 2, prob=p, replace=replace, edge_dir='out')
subg = dgl.sampling.sample_neighbors(
hg,
{"user": [0, 1], "game": 0},
2,
prob=p,
replace=replace,
edge_dir="out",
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 4
assert subg['play'].number_of_edges() == 2 if replace else 1
assert subg['liked-by'].number_of_edges() == 4 if replace else 3
assert subg['flips'].number_of_edges() == 0
assert subg["follow"].number_of_edges() == 4
assert subg["play"].number_of_edges() == 2 if replace else 1
assert subg["liked-by"].number_of_edges() == 4 if replace else 3
assert subg["flips"].number_of_edges() == 0
_test3(None, True) # w/ replacement, uniform
_test3(None, True) # w/ replacement, uniform
_test3(None, False) # w/o replacement, uniform
_test3('prob', True) # w/ replacement
_test3('prob', False) # w/o replacement
_test3("prob", True) # w/ replacement
_test3("prob", False) # w/o replacement
def _test_sample_neighbors_topk(hypersparse):
g, hg = _gen_neighbor_topk_test_graph(hypersparse, False)
def _test1():
subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 1])
subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1])
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans = subg.in_edges([0, 1])
......@@ -553,17 +820,18 @@ def _test_sample_neighbors_topk(hypersparse):
uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
assert uv == uv_ans
subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 1])
subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1])
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 4
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
assert edge_set == {(2,0),(1,0),(2,1),(3,1)}
assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)}
_test1()
def _test2(): # k > #neighbors
subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 2])
subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2])
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans = subg.in_edges([0, 2])
......@@ -571,49 +839,64 @@ def _test_sample_neighbors_topk(hypersparse):
uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
assert uv == uv_ans
subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 2])
subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2])
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 3
u, v = subg.edges()
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert edge_set == {(2,0),(1,0),(0,2)}
assert edge_set == {(2, 0), (1, 0), (0, 2)}
_test2()
def _test3():
subg = dgl.sampling.select_topk(hg, 2, 'weight', {'user' : [0,1], 'game' : 0})
subg = dgl.sampling.select_topk(
hg, 2, "weight", {"user": [0, 1], "game": 0}
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
assert edge_set == {(2,0),(1,0),(2,1),(3,1)}
u, v = subg['play'].edges()
assert F.array_equal(
hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
)
assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)}
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
assert edge_set == {(0,0)}
u, v = subg['liked-by'].edges()
assert F.array_equal(
hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]
)
assert edge_set == {(0, 0)}
u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
assert edge_set == {(2,0),(2,1),(1,0)}
assert subg['flips'].number_of_edges() == 0
assert F.array_equal(
hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
)
assert edge_set == {(2, 0), (2, 1), (1, 0)}
assert subg["flips"].number_of_edges() == 0
_test3()
# test different k for different relations
subg = dgl.sampling.select_topk(
hg, {'follow': 1, 'play': 2, 'liked-by': 0, 'flips': -1}, 'weight', {'user' : [0,1], 'game' : 0, 'coin': 0})
hg,
{"follow": 1, "play": 2, "liked-by": 0, "flips": -1},
"weight",
{"user": [0, 1], "game": 0, "coin": 0},
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
assert subg['follow'].number_of_edges() == 2
assert subg['play'].number_of_edges() == 1
assert subg['liked-by'].number_of_edges() == 0
assert subg['flips'].number_of_edges() == 4
assert subg["follow"].number_of_edges() == 2
assert subg["play"].number_of_edges() == 1
assert subg["liked-by"].number_of_edges() == 0
assert subg["flips"].number_of_edges() == 4
def _test_sample_neighbors_topk_outedge(hypersparse):
g, hg = _gen_neighbor_topk_test_graph(hypersparse, True)
def _test1():
subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 1], edge_dir='out')
subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1], edge_dir="out")
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans = subg.out_edges([0, 1])
......@@ -621,17 +904,18 @@ def _test_sample_neighbors_topk_outedge(hypersparse):
uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
assert uv == uv_ans
subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 1], edge_dir='out')
subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1], edge_dir="out")
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 4
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
assert edge_set == {(0,2),(0,1),(1,2),(1,3)}
assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)}
_test1()
def _test2(): # k > #neighbors
subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 2], edge_dir='out')
subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2], edge_dir="out")
assert subg.number_of_nodes() == g.number_of_nodes()
u, v = subg.edges()
u_ans, v_ans = subg.out_edges([0, 2])
......@@ -639,118 +923,177 @@ def _test_sample_neighbors_topk_outedge(hypersparse):
uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
assert uv == uv_ans
subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 2], edge_dir='out')
subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2], edge_dir="out")
assert subg.number_of_nodes() == g.number_of_nodes()
assert subg.number_of_edges() == 3
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
assert edge_set == {(0,2),(0,1),(2,0)}
assert edge_set == {(0, 2), (0, 1), (2, 0)}
_test2()
def _test3():
subg = dgl.sampling.select_topk(hg, 2, 'weight', {'user' : [0,1], 'game' : 0}, edge_dir='out')
subg = dgl.sampling.select_topk(
hg, 2, "weight", {"user": [0, 1], "game": 0}, edge_dir="out"
)
assert len(subg.ntypes) == 3
assert len(subg.etypes) == 4
u, v = subg['follow'].edges()
u, v = subg["follow"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
assert edge_set == {(0,2),(0,1),(1,2),(1,3)}
u, v = subg['play'].edges()
assert F.array_equal(
hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
)
assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)}
u, v = subg["play"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
assert edge_set == {(0,0)}
u, v = subg['liked-by'].edges()
assert F.array_equal(
hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]
)
assert edge_set == {(0, 0)}
u, v = subg["liked-by"].edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
assert edge_set == {(0,2),(1,2),(0,1)}
assert subg['flips'].number_of_edges() == 0
assert F.array_equal(
hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
)
assert edge_set == {(0, 2), (1, 2), (0, 1)}
assert subg["flips"].number_of_edges() == 0
_test3()
def test_sample_neighbors_noprob():
_test_sample_neighbors(False, None)
#_test_sample_neighbors(True)
# _test_sample_neighbors(True)
def test_sample_neighbors_prob():
_test_sample_neighbors(False, 'prob')
#_test_sample_neighbors(True)
_test_sample_neighbors(False, "prob")
# _test_sample_neighbors(True)
def test_sample_neighbors_outedge():
_test_sample_neighbors_outedge(False)
#_test_sample_neighbors_outedge(True)
# _test_sample_neighbors_outedge(True)
@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors with mask not implemented")
@unittest.skipIf(
F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
)
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors with mask not implemented",
)
def test_sample_neighbors_mask():
_test_sample_neighbors(False, 'mask')
_test_sample_neighbors(False, "mask")
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
def test_sample_neighbors_topk():
_test_sample_neighbors_topk(False)
#_test_sample_neighbors_topk(True)
# _test_sample_neighbors_topk(True)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
def test_sample_neighbors_topk_outedge():
_test_sample_neighbors_topk_outedge(False)
#_test_sample_neighbors_topk_outedge(True)
# _test_sample_neighbors_topk_outedge(True)
def test_sample_neighbors_with_0deg():
g = dgl.graph(([], []), num_nodes=5).to(F.ctx())
sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='in', replace=False)
sg = dgl.sampling.sample_neighbors(
g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=False
)
assert sg.number_of_edges() == 0
sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='in', replace=True)
sg = dgl.sampling.sample_neighbors(
g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=True
)
assert sg.number_of_edges() == 0
sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='out', replace=False)
sg = dgl.sampling.sample_neighbors(
g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=False
)
assert sg.number_of_edges() == 0
sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='out', replace=True)
sg = dgl.sampling.sample_neighbors(
g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=True
)
assert sg.number_of_edges() == 0
def create_test_graph(num_nodes, num_edges_per_node, bipartite=False):
src = np.concatenate(
[np.array([i] * num_edges_per_node) for i in range(num_nodes)])
[np.array([i] * num_edges_per_node) for i in range(num_nodes)]
)
dst = np.concatenate(
[np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes)]
[
np.random.choice(num_nodes, num_edges_per_node, replace=False)
for i in range(num_nodes)
]
)
if bipartite:
g = dgl.heterograph({("u", "e", "v") : (src, dst)})
g = dgl.heterograph({("u", "e", "v"): (src, dst)})
else:
g = dgl.graph((src, dst))
return g
def create_etype_test_graph(num_nodes, num_edges_per_node, rare_cnt):
src = np.concatenate(
[np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes)]
[
np.random.choice(num_nodes, num_edges_per_node, replace=False)
for i in range(num_nodes)
]
)
dst = np.concatenate(
[np.array([i] * num_edges_per_node) for i in range(num_nodes)])
[np.array([i] * num_edges_per_node) for i in range(num_nodes)]
)
minor_src = np.concatenate(
[np.random.choice(num_nodes, 2, replace=False) for i in range(num_nodes)]
[
np.random.choice(num_nodes, 2, replace=False)
for i in range(num_nodes)
]
)
minor_dst = np.concatenate(
[np.array([i] * 2) for i in range(num_nodes)])
minor_dst = np.concatenate([np.array([i] * 2) for i in range(num_nodes)])
most_zero_src = np.concatenate(
[np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(rare_cnt)]
[
np.random.choice(num_nodes, num_edges_per_node, replace=False)
for i in range(rare_cnt)
]
)
most_zero_dst = np.concatenate(
[np.array([i] * num_edges_per_node) for i in range(rare_cnt)])
[np.array([i] * num_edges_per_node) for i in range(rare_cnt)]
)
g = dgl.heterograph({("v", "e_major", "u") : (src, dst),
("u", "e_major_rev", "v") : (dst, src),
("v2", "e_minor", "u") : (minor_src, minor_dst),
("v2", "most_zero", "u") : (most_zero_src, most_zero_dst),
("u", "e_minor_rev", "v2") : (minor_dst, minor_src)})
g = dgl.heterograph(
{
("v", "e_major", "u"): (src, dst),
("u", "e_major_rev", "v"): (dst, src),
("v2", "e_minor", "u"): (minor_src, minor_dst),
("v2", "most_zero", "u"): (most_zero_src, most_zero_dst),
("u", "e_minor_rev", "v2"): (minor_dst, minor_src),
}
)
for etype in g.etypes:
prob = np.random.rand(g.num_edges(etype))
prob[prob > 0.2] = 0
g.edges[etype].data['p'] = F.zerocopy_from_numpy(prob)
g.edges[etype].data['mask'] = F.zerocopy_from_numpy(prob != 0)
g.edges[etype].data["p"] = F.zerocopy_from_numpy(prob)
g.edges[etype].data["mask"] = F.zerocopy_from_numpy(prob != 0)
return g
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
def test_sample_neighbors_biased_homogeneous():
g = create_test_graph(100, 30)
......@@ -769,7 +1112,9 @@ def test_sample_neighbors_biased_homogeneous():
# inedge / without replacement
g_sorted = dgl.sort_csc_by_tag(g, tag)
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, replace=False)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.nodes(), 5, bias, replace=False
)
check_num(subg.edges()[0], tag)
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
......@@ -777,13 +1122,17 @@ def test_sample_neighbors_biased_homogeneous():
# inedge / with replacement
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, replace=True)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.nodes(), 5, bias, replace=True
)
check_num(subg.edges()[0], tag)
# outedge / without replacement
g_sorted = dgl.sort_csr_by_tag(g, tag)
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, edge_dir='out', replace=False)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=False
)
check_num(subg.edges()[1], tag)
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
......@@ -791,14 +1140,21 @@ def test_sample_neighbors_biased_homogeneous():
# outedge / with replacement
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, edge_dir='out', replace=True)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=True
)
check_num(subg.edges()[1], tag)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
def test_sample_neighbors_biased_bipartite():
g = create_test_graph(100, 30, True)
num_dst = g.number_of_dst_nodes()
bias = F.tensor([0, 0.01, 10, 10], dtype=F.float32)
def check_num(nodes, tag):
nodes, tag = F.asnumpy(nodes), F.asnumpy(tag)
cnt = [sum(tag[nodes] == i) for i in range(4)]
......@@ -813,7 +1169,9 @@ def test_sample_neighbors_biased_bipartite():
tag = F.tensor(np.random.choice(4, 100))
g_sorted = dgl.sort_csc_by_tag(g, tag)
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.dstnodes(), 5, bias, replace=False)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.dstnodes(), 5, bias, replace=False
)
check_num(subg.edges()[0], tag)
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
......@@ -821,14 +1179,18 @@ def test_sample_neighbors_biased_bipartite():
# inedge / with replacement
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.dstnodes(), 5, bias, replace=True)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.dstnodes(), 5, bias, replace=True
)
check_num(subg.edges()[0], tag)
# outedge / without replacement
tag = F.tensor(np.random.choice(4, num_dst))
g_sorted = dgl.sort_csr_by_tag(g, tag)
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.srcnodes(), 5, bias, edge_dir='out', replace=False)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=False
)
check_num(subg.edges()[1], tag)
u, v = subg.edges()
edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
......@@ -836,23 +1198,31 @@ def test_sample_neighbors_biased_bipartite():
# outedge / with replacement
for _ in range(5):
subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.srcnodes(), 5, bias, edge_dir='out', replace=True)
subg = dgl.sampling.sample_neighbors_biased(
g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=True
)
check_num(subg.edges()[1], tag)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
@pytest.mark.parametrize('format_', ['coo', 'csr', 'csc'])
@pytest.mark.parametrize('direction', ['in', 'out'])
@pytest.mark.parametrize('replace', [False, True])
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
@unittest.skipIf(
F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
)
@pytest.mark.parametrize("format_", ["coo", "csr", "csc"])
@pytest.mark.parametrize("direction", ["in", "out"])
@pytest.mark.parametrize("replace", [False, True])
def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
num_nodes = 100
rare_cnt = 4
g = create_etype_test_graph(100, 30, rare_cnt)
h_g = dgl.to_homogeneous(g, edata=['p', 'mask'])
h_g = dgl.to_homogeneous(g, edata=["p", "mask"])
h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE])
h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist()
sg = g.edge_subgraph(g.edata['mask'], relabel_nodes=False)
h_sg = h_g.edge_subgraph(h_g.edata['mask'], relabel_nodes=False)
sg = g.edge_subgraph(g.edata["mask"], relabel_nodes=False)
h_sg = h_g.edge_subgraph(h_g.edata["mask"], relabel_nodes=False)
h_sg_etype = F.asnumpy(h_sg.edata[dgl.ETYPE])
h_sg_offset = np.cumsum(np.insert(np.bincount(h_sg_etype), 0, 0)).tolist()
......@@ -883,7 +1253,7 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
all_dst_per_etype.append(all_dst[all_etype_array == etype])
if replace:
if direction == 'in':
if direction == "in":
in_degree_per_etype = [np.bincount(d) for d in dst_per_etype]
for etype in range(len(fanouts)):
in_degree = in_degree_per_etype[etype]
......@@ -902,7 +1272,7 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
ans[all_src_per_etype[etype]] = fanout
assert np.all(out_degree == ans)
else:
if direction == 'in':
if direction == "in":
for v in set(dst):
u = src[dst == v]
et = etype_array[dst == v]
......@@ -911,7 +1281,9 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
for etype in set(et):
u_etype = set(u[et == etype])
all_u_etype = set(all_u[all_et == etype])
assert (len(u_etype) == fanouts[etype]) or (u_etype == all_u_etype)
assert (len(u_etype) == fanouts[etype]) or (
u_etype == all_u_etype
)
else:
for u in set(src):
v = dst[src == u]
......@@ -921,36 +1293,59 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
for etype in set(et):
v_etype = set(v[et == etype])
all_v_etype = set(all_v[all_et == etype])
assert (len(v_etype) == fanouts[etype]) or (v_etype == all_v_etype)
assert (len(v_etype) == fanouts[etype]) or (
v_etype == all_v_etype
)
all_src, all_dst = h_g.edges()
all_sub_src, all_sub_dst = h_sg.edges()
h_g = h_g.formats(format_)
if (direction, format_) in [('in', 'csr'), ('out', 'csc')]:
h_g = h_g.formats(['csc', 'csr', 'coo'])
if (direction, format_) in [("in", "csr"), ("out", "csc")]:
h_g = h_g.formats(["csc", "csr", "coo"])
for _ in range(5):
subg = dgl.sampling.sample_etype_neighbors(
h_g, seeds, h_g_offset, fanouts, replace=replace,
edge_dir=direction)
h_g, seeds, h_g_offset, fanouts, replace=replace, edge_dir=direction
)
check_num(h_g, all_src, all_dst, subg, replace, fanouts, direction)
p = [g.edges[etype].data['p'] for etype in g.etypes]
p = [g.edges[etype].data["p"] for etype in g.etypes]
subg = dgl.sampling.sample_etype_neighbors(
h_g, seeds, h_g_offset, fanouts, replace=replace,
edge_dir=direction, prob=p)
check_num(h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction)
p = [g.edges[etype].data['mask'] for etype in g.etypes]
h_g,
seeds,
h_g_offset,
fanouts,
replace=replace,
edge_dir=direction,
prob=p,
)
check_num(
h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction
)
p = [g.edges[etype].data["mask"] for etype in g.etypes]
subg = dgl.sampling.sample_etype_neighbors(
h_g, seeds, h_g_offset, fanouts, replace=replace,
edge_dir=direction, prob=p)
check_num(h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
@pytest.mark.parametrize('format_', ['csr', 'csc'])
@pytest.mark.parametrize('direction', ['in', 'out'])
h_g,
seeds,
h_g_offset,
fanouts,
replace=replace,
edge_dir=direction,
prob=p,
)
check_num(
h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction
)
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU sample neighbors not implemented",
)
@unittest.skipIf(
F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
)
@pytest.mark.parametrize("format_", ["csr", "csc"])
@pytest.mark.parametrize("direction", ["in", "out"])
def test_sample_neighbors_etype_sorted_homogeneous(format_, direction):
rare_cnt = 4
g = create_etype_test_graph(100, 30, rare_cnt)
......@@ -959,33 +1354,49 @@ def test_sample_neighbors_etype_sorted_homogeneous(format_, direction):
seeds = F.nonzero_1d(h_g.ndata[dgl.NTYPE] == seed_ntype)
fanouts = F.tensor([6, 5, -1, 3, 2], dtype=F.int64)
h_g = h_g.formats(format_)
if (direction, format_) in [('in', 'csr'), ('out', 'csc')]:
h_g = h_g.formats(['csc', 'csr', 'coo'])
if (direction, format_) in [("in", "csr"), ("out", "csc")]:
h_g = h_g.formats(["csc", "csr", "coo"])
if direction == 'in':
h_g = dgl.sort_csc_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type='edge')
if direction == "in":
h_g = dgl.sort_csc_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge")
else:
h_g = dgl.sort_csr_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type='edge')
h_g = dgl.sort_csr_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge")
# shuffle
h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE])
h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist()
sg = dgl.sampling.sample_etype_neighbors(
h_g, seeds, h_g_offset, fanouts, edge_dir=direction, etype_sorted=True)
h_g, seeds, h_g_offset, fanouts, edge_dir=direction, etype_sorted=True
)
@pytest.mark.parametrize('dtype', ['int32', 'int64'])
@pytest.mark.parametrize("dtype", ["int32", "int64"])
def test_sample_neighbors_exclude_edges_heteroG(dtype):
d_i_d_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
d_i_d_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_i_d_u_nodes.shape, dtype=dtype))
d_i_g_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
d_i_g_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_i_g_u_nodes.shape, dtype=dtype))
d_t_d_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
d_t_d_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_t_d_u_nodes.shape, dtype=dtype))
g = dgl.heterograph({
('drug', 'interacts', 'drug'): (d_i_d_u_nodes, d_i_d_v_nodes),
('drug', 'interacts', 'gene'): (d_i_g_u_nodes, d_i_g_v_nodes),
('drug', 'treats', 'disease'): (d_t_d_u_nodes, d_t_d_v_nodes)
}).to(F.ctx())
d_i_d_u_nodes = F.zerocopy_from_numpy(
np.unique(np.random.randint(300, size=100, dtype=dtype))
)
d_i_d_v_nodes = F.zerocopy_from_numpy(
np.random.randint(25, size=d_i_d_u_nodes.shape, dtype=dtype)
)
d_i_g_u_nodes = F.zerocopy_from_numpy(
np.unique(np.random.randint(300, size=100, dtype=dtype))
)
d_i_g_v_nodes = F.zerocopy_from_numpy(
np.random.randint(25, size=d_i_g_u_nodes.shape, dtype=dtype)
)
d_t_d_u_nodes = F.zerocopy_from_numpy(
np.unique(np.random.randint(300, size=100, dtype=dtype))
)
d_t_d_v_nodes = F.zerocopy_from_numpy(
np.random.randint(25, size=d_t_d_u_nodes.shape, dtype=dtype)
)
g = dgl.heterograph(
{
("drug", "interacts", "drug"): (d_i_d_u_nodes, d_i_d_v_nodes),
("drug", "interacts", "gene"): (d_i_g_u_nodes, d_i_g_v_nodes),
("drug", "treats", "disease"): (d_t_d_u_nodes, d_t_d_v_nodes),
}
).to(F.ctx())
(U, V, EID) = (0, 1, 2)
......@@ -995,7 +1406,9 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
did_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
sampled_amount = np.random.randint(low=1, high=10, dtype=dtype)
drug_i_drug_edges = g.all_edges(form='all', etype=('drug','interacts','drug'))
drug_i_drug_edges = g.all_edges(
form="all", etype=("drug", "interacts", "drug")
)
excluded_d_i_d_edges = drug_i_drug_edges[EID][did_b_idx:did_e_idx]
sampled_drug_node = drug_i_drug_edges[V][nd_b_idx:nd_e_idx]
did_excluded_nodes_U = drug_i_drug_edges[U][did_b_idx:did_e_idx]
......@@ -1005,7 +1418,9 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
dig_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
dig_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
drug_i_gene_edges = g.all_edges(form='all', etype=('drug','interacts','gene'))
drug_i_gene_edges = g.all_edges(
form="all", etype=("drug", "interacts", "gene")
)
excluded_d_i_g_edges = drug_i_gene_edges[EID][dig_b_idx:dig_e_idx]
dig_excluded_nodes_U = drug_i_gene_edges[U][dig_b_idx:dig_e_idx]
dig_excluded_nodes_V = drug_i_gene_edges[V][dig_b_idx:dig_e_idx]
......@@ -1015,65 +1430,110 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
dtd_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
dtd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
drug_t_dis_edges = g.all_edges(form='all', etype=('drug','treats','disease'))
drug_t_dis_edges = g.all_edges(
form="all", etype=("drug", "treats", "disease")
)
excluded_d_t_d_edges = drug_t_dis_edges[EID][dtd_b_idx:dtd_e_idx]
dtd_excluded_nodes_U = drug_t_dis_edges[U][dtd_b_idx:dtd_e_idx]
dtd_excluded_nodes_V = drug_t_dis_edges[V][dtd_b_idx:dtd_e_idx]
sampled_disease_node = drug_t_dis_edges[V][nd_b_idx:nd_e_idx]
excluded_edges = {('drug', 'interacts', 'drug'): excluded_d_i_d_edges,
('drug', 'interacts', 'gene'): excluded_d_i_g_edges,
('drug', 'treats', 'disease'): excluded_d_t_d_edges
}
sg = dgl.sampling.sample_neighbors(g, {'drug': sampled_drug_node,
'gene': sampled_gene_node,
'disease': sampled_disease_node},
sampled_amount, exclude_edges=excluded_edges)
assert not np.any(F.asnumpy(sg.has_edges_between(did_excluded_nodes_U,did_excluded_nodes_V,
etype=('drug','interacts','drug'))))
assert not np.any(F.asnumpy(sg.has_edges_between(dig_excluded_nodes_U,dig_excluded_nodes_V,
etype=('drug','interacts','gene'))))
assert not np.any(F.asnumpy(sg.has_edges_between(dtd_excluded_nodes_U,dtd_excluded_nodes_V,
etype=('drug','treats','disease'))))
@pytest.mark.parametrize('dtype', ['int32', 'int64'])
excluded_edges = {
("drug", "interacts", "drug"): excluded_d_i_d_edges,
("drug", "interacts", "gene"): excluded_d_i_g_edges,
("drug", "treats", "disease"): excluded_d_t_d_edges,
}
sg = dgl.sampling.sample_neighbors(
g,
{
"drug": sampled_drug_node,
"gene": sampled_gene_node,
"disease": sampled_disease_node,
},
sampled_amount,
exclude_edges=excluded_edges,
)
assert not np.any(
F.asnumpy(
sg.has_edges_between(
did_excluded_nodes_U,
did_excluded_nodes_V,
etype=("drug", "interacts", "drug"),
)
)
)
assert not np.any(
F.asnumpy(
sg.has_edges_between(
dig_excluded_nodes_U,
dig_excluded_nodes_V,
etype=("drug", "interacts", "gene"),
)
)
)
assert not np.any(
F.asnumpy(
sg.has_edges_between(
dtd_excluded_nodes_U,
dtd_excluded_nodes_V,
etype=("drug", "treats", "disease"),
)
)
)
@pytest.mark.parametrize("dtype", ["int32", "int64"])
def test_sample_neighbors_exclude_edges_homoG(dtype):
u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300,size=100, dtype=dtype)))
v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=u_nodes.shape, dtype=dtype))
u_nodes = F.zerocopy_from_numpy(
np.unique(np.random.randint(300, size=100, dtype=dtype))
)
v_nodes = F.zerocopy_from_numpy(
np.random.randint(25, size=u_nodes.shape, dtype=dtype)
)
g = dgl.graph((u_nodes, v_nodes)).to(F.ctx())
(U, V, EID) = (0, 1, 2)
nd_b_idx = np.random.randint(low=1,high=24, dtype=dtype)
nd_e_idx = np.random.randint(low=25,high=49, dtype=dtype)
b_idx = np.random.randint(low=1,high=24, dtype=dtype)
e_idx = np.random.randint(low=25,high=49, dtype=dtype)
sampled_amount = np.random.randint(low=1,high=10, dtype=dtype)
nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
b_idx = np.random.randint(low=1, high=24, dtype=dtype)
e_idx = np.random.randint(low=25, high=49, dtype=dtype)
sampled_amount = np.random.randint(low=1, high=10, dtype=dtype)
g_edges = g.all_edges(form='all')
g_edges = g.all_edges(form="all")
excluded_edges = g_edges[EID][b_idx:e_idx]
sampled_node = g_edges[V][nd_b_idx:nd_e_idx]
excluded_nodes_U = g_edges[U][b_idx:e_idx]
excluded_nodes_V = g_edges[V][b_idx:e_idx]
sg = dgl.sampling.sample_neighbors(g, sampled_node,
sampled_amount, exclude_edges=excluded_edges)
sg = dgl.sampling.sample_neighbors(
g, sampled_node, sampled_amount, exclude_edges=excluded_edges
)
assert not np.any(
F.asnumpy(sg.has_edges_between(excluded_nodes_U, excluded_nodes_V))
)
assert not np.any(F.asnumpy(sg.has_edges_between(excluded_nodes_U,excluded_nodes_V)))
@pytest.mark.parametrize('dtype', ['int32', 'int64'])
@pytest.mark.parametrize("dtype", ["int32", "int64"])
def test_global_uniform_negative_sampling(dtype):
g = dgl.graph(([], []), num_nodes=1000).to(F.ctx())
src, dst = dgl.sampling.global_uniform_negative_sampling(g, 2000, False, True)
src, dst = dgl.sampling.global_uniform_negative_sampling(
g, 2000, False, True
)
assert len(src) == 2000
assert len(dst) == 2000
g = dgl.graph((np.random.randint(0, 20, (300,)), np.random.randint(0, 20, (300,)))).to(F.ctx())
g = dgl.graph(
(np.random.randint(0, 20, (300,)), np.random.randint(0, 20, (300,)))
).to(F.ctx())
src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, True)
assert not F.asnumpy(g.has_edges_between(src, dst)).any()
src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, False)
src, dst = dgl.sampling.global_uniform_negative_sampling(
g, 20, False, False
)
assert not F.asnumpy(g.has_edges_between(src, dst)).any()
src = F.asnumpy(src)
dst = F.asnumpy(dst)
......@@ -1081,7 +1541,9 @@ def test_global_uniform_negative_sampling(dtype):
assert len(s) == len(src)
g = dgl.graph(([0], [1])).to(F.ctx())
src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, True, False, redundancy=10)
src, dst = dgl.sampling.global_uniform_negative_sampling(
g, 20, True, False, redundancy=10
)
src = F.asnumpy(src)
dst = F.asnumpy(dst)
# should have either no element or (1, 0)
......@@ -1091,21 +1553,33 @@ def test_global_uniform_negative_sampling(dtype):
assert src[0] == 1
assert dst[0] == 0
g = dgl.heterograph({
('A', 'AB', 'B'): (np.random.randint(0, 20, (300,)), np.random.randint(0, 40, (300,))),
('B', 'BA', 'A'): (np.random.randint(0, 40, (200,)), np.random.randint(0, 20, (200,)))}).to(F.ctx())
src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, etype='AB')
assert not F.asnumpy(g.has_edges_between(src, dst, etype='AB')).any()
g = dgl.heterograph(
{
("A", "AB", "B"): (
np.random.randint(0, 20, (300,)),
np.random.randint(0, 40, (300,)),
),
("B", "BA", "A"): (
np.random.randint(0, 40, (200,)),
np.random.randint(0, 20, (200,)),
),
}
).to(F.ctx())
src, dst = dgl.sampling.global_uniform_negative_sampling(
g, 20, False, etype="AB"
)
assert not F.asnumpy(g.has_edges_between(src, dst, etype="AB")).any()
if __name__ == '__main__':
if __name__ == "__main__":
from itertools import product
test_sample_neighbors_noprob()
test_sample_neighbors_prob()
test_sample_neighbors_mask()
for args in product(['coo', 'csr', 'csc'], ['in', 'out'], [False, True]):
for args in product(["coo", "csr", "csc"], ["in", "out"], [False, True]):
test_sample_neighbors_etype_homogeneous(*args)
for args in product(['csr', 'csc'], ['in', 'out']):
for args in product(["csr", "csc"], ["in", "out"]):
test_sample_neighbors_etype_sorted_homogeneous(*args)
test_non_uniform_random_walk(False)
test_uniform_random_walk(False)
......@@ -1117,7 +1591,7 @@ if __name__ == '__main__':
test_sample_neighbors_with_0deg()
test_sample_neighbors_biased_homogeneous()
test_sample_neighbors_biased_bipartite()
test_sample_neighbors_exclude_edges_heteroG('int32')
test_sample_neighbors_exclude_edges_homoG('int32')
test_global_uniform_negative_sampling('int32')
test_global_uniform_negative_sampling('int64')
test_sample_neighbors_exclude_edges_heteroG("int32")
test_sample_neighbors_exclude_edges_homoG("int32")
test_global_uniform_negative_sampling("int32")
test_global_uniform_negative_sampling("int64")
import unittest
import backend as F
import dgl
import numpy as np
import backend as F
import unittest
from test_utils import parametrize_idtype
def tree1(idtype):
"""Generate a tree
0
......@@ -19,10 +22,11 @@ def tree1(idtype):
g.add_edges(4, 1)
g.add_edges(1, 0)
g.add_edges(2, 0)
g.ndata['h'] = F.tensor([0, 1, 2, 3, 4])
g.edata['h'] = F.randn((4, 10))
g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
g.edata["h"] = F.randn((4, 10))
return g
def tree2(idtype):
"""Generate a tree
1
......@@ -38,10 +42,11 @@ def tree2(idtype):
g.add_edges(0, 4)
g.add_edges(4, 1)
g.add_edges(3, 1)
g.ndata['h'] = F.tensor([0, 1, 2, 3, 4])
g.edata['h'] = F.randn((4, 10))
g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
g.edata["h"] = F.randn((4, 10))
return g
@parametrize_idtype
def test_batch_unbatch(idtype):
t1 = tree1(idtype)
......@@ -55,10 +60,11 @@ def test_batch_unbatch(idtype):
assert F.allclose(bg.batch_num_edges(), F.tensor([4, 4]))
tt1, tt2 = dgl.unbatch(bg)
assert F.allclose(t1.ndata['h'], tt1.ndata['h'])
assert F.allclose(t1.edata['h'], tt1.edata['h'])
assert F.allclose(t2.ndata['h'], tt2.ndata['h'])
assert F.allclose(t2.edata['h'], tt2.edata['h'])
assert F.allclose(t1.ndata["h"], tt1.ndata["h"])
assert F.allclose(t1.edata["h"], tt1.edata["h"])
assert F.allclose(t2.ndata["h"], tt2.ndata["h"])
assert F.allclose(t2.edata["h"], tt2.edata["h"])
@parametrize_idtype
def test_batch_unbatch1(idtype):
......@@ -73,14 +79,18 @@ def test_batch_unbatch1(idtype):
assert F.allclose(b2.batch_num_edges(), F.tensor([4, 4, 4]))
s1, s2, s3 = dgl.unbatch(b2)
assert F.allclose(t2.ndata['h'], s1.ndata['h'])
assert F.allclose(t2.edata['h'], s1.edata['h'])
assert F.allclose(t1.ndata['h'], s2.ndata['h'])
assert F.allclose(t1.edata['h'], s2.edata['h'])
assert F.allclose(t2.ndata['h'], s3.ndata['h'])
assert F.allclose(t2.edata['h'], s3.edata['h'])
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update")
assert F.allclose(t2.ndata["h"], s1.ndata["h"])
assert F.allclose(t2.edata["h"], s1.edata["h"])
assert F.allclose(t1.ndata["h"], s2.ndata["h"])
assert F.allclose(t1.edata["h"], s2.edata["h"])
assert F.allclose(t2.ndata["h"], s3.ndata["h"])
assert F.allclose(t2.edata["h"], s3.edata["h"])
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support inplace update",
)
@parametrize_idtype
def test_batch_unbatch_frame(idtype):
"""Test module of node/edge frames of batched/unbatched DGLGraphs.
......@@ -93,30 +103,31 @@ def test_batch_unbatch_frame(idtype):
N2 = t2.number_of_nodes()
E2 = t2.number_of_edges()
D = 10
t1.ndata['h'] = F.randn((N1, D))
t1.edata['h'] = F.randn((E1, D))
t2.ndata['h'] = F.randn((N2, D))
t2.edata['h'] = F.randn((E2, D))
t1.ndata["h"] = F.randn((N1, D))
t1.edata["h"] = F.randn((E1, D))
t2.ndata["h"] = F.randn((N2, D))
t2.edata["h"] = F.randn((E2, D))
b1 = dgl.batch([t1, t2])
b2 = dgl.batch([t2])
b1.ndata['h'][:N1] = F.zeros((N1, D))
b1.edata['h'][:E1] = F.zeros((E1, D))
b2.ndata['h'][:N2] = F.zeros((N2, D))
b2.edata['h'][:E2] = F.zeros((E2, D))
assert not F.allclose(t1.ndata['h'], F.zeros((N1, D)))
assert not F.allclose(t1.edata['h'], F.zeros((E1, D)))
assert not F.allclose(t2.ndata['h'], F.zeros((N2, D)))
assert not F.allclose(t2.edata['h'], F.zeros((E2, D)))
b1.ndata["h"][:N1] = F.zeros((N1, D))
b1.edata["h"][:E1] = F.zeros((E1, D))
b2.ndata["h"][:N2] = F.zeros((N2, D))
b2.edata["h"][:E2] = F.zeros((E2, D))
assert not F.allclose(t1.ndata["h"], F.zeros((N1, D)))
assert not F.allclose(t1.edata["h"], F.zeros((E1, D)))
assert not F.allclose(t2.ndata["h"], F.zeros((N2, D)))
assert not F.allclose(t2.edata["h"], F.zeros((E2, D)))
g1, g2 = dgl.unbatch(b1)
_g2, = dgl.unbatch(b2)
assert F.allclose(g1.ndata['h'], F.zeros((N1, D)))
assert F.allclose(g1.edata['h'], F.zeros((E1, D)))
assert F.allclose(g2.ndata['h'], t2.ndata['h'])
assert F.allclose(g2.edata['h'], t2.edata['h'])
assert F.allclose(_g2.ndata['h'], F.zeros((N2, D)))
assert F.allclose(_g2.edata['h'], F.zeros((E2, D)))
(_g2,) = dgl.unbatch(b2)
assert F.allclose(g1.ndata["h"], F.zeros((N1, D)))
assert F.allclose(g1.edata["h"], F.zeros((E1, D)))
assert F.allclose(g2.ndata["h"], t2.ndata["h"])
assert F.allclose(g2.edata["h"], t2.edata["h"])
assert F.allclose(_g2.ndata["h"], F.zeros((N2, D)))
assert F.allclose(_g2.edata["h"], F.zeros((E2, D)))
@parametrize_idtype
def test_batch_unbatch2(idtype):
......@@ -128,10 +139,11 @@ def test_batch_unbatch2(idtype):
b.add_nodes(3)
b.add_edges(0, [1, 2])
c = dgl.batch([a, b])
c.ndata['h'] = F.ones((7, 1))
c.edata['w'] = F.ones((5, 1))
assert F.allclose(c.ndata['h'], F.ones((7, 1)))
assert F.allclose(c.edata['w'], F.ones((5, 1)))
c.ndata["h"] = F.ones((7, 1))
c.edata["w"] = F.ones((5, 1))
assert F.allclose(c.ndata["h"], F.ones((7, 1)))
assert F.allclose(c.edata["w"], F.ones((5, 1)))
@parametrize_idtype
def test_batch_send_and_recv(idtype):
......@@ -139,16 +151,17 @@ def test_batch_send_and_recv(idtype):
t2 = tree2(idtype)
bg = dgl.batch([t1, t2])
_mfunc = lambda edges: {'m' : edges.src['h']}
_rfunc = lambda nodes: {'h' : F.sum(nodes.mailbox['m'], 1)}
_mfunc = lambda edges: {"m": edges.src["h"]}
_rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
u = [3, 4, 2 + 5, 0 + 5]
v = [1, 1, 4 + 5, 4 + 5]
bg.send_and_recv((u, v), _mfunc, _rfunc)
t1, t2 = dgl.unbatch(bg)
assert F.asnumpy(t1.ndata['h'][1]) == 7
assert F.asnumpy(t2.ndata['h'][4]) == 2
assert F.asnumpy(t1.ndata["h"][1]) == 7
assert F.asnumpy(t2.ndata["h"][4]) == 2
@parametrize_idtype
def test_batch_propagate(idtype):
......@@ -156,8 +169,8 @@ def test_batch_propagate(idtype):
t2 = tree2(idtype)
bg = dgl.batch([t1, t2])
_mfunc = lambda edges: {'m' : edges.src['h']}
_rfunc = lambda nodes: {'h' : F.sum(nodes.mailbox['m'], 1)}
_mfunc = lambda edges: {"m": edges.src["h"]}
_rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
# get leaves.
order = []
......@@ -175,8 +188,9 @@ def test_batch_propagate(idtype):
bg.prop_edges(order, _mfunc, _rfunc)
t1, t2 = dgl.unbatch(bg)
assert F.asnumpy(t1.ndata['h'][0]) == 9
assert F.asnumpy(t2.ndata['h'][1]) == 5
assert F.asnumpy(t1.ndata["h"][0]) == 9
assert F.asnumpy(t2.ndata["h"][1]) == 5
@parametrize_idtype
def test_batched_edge_ordering(idtype):
......@@ -184,17 +198,18 @@ def test_batched_edge_ordering(idtype):
g1.add_nodes(6)
g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
e1 = F.randn((5, 10))
g1.edata['h'] = e1
g1.edata["h"] = e1
g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
g2.add_nodes(6)
g2.add_edges([0, 1 ,2 ,5, 4 ,5], [1, 2, 3, 4, 3, 0])
g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
e2 = F.randn((6, 10))
g2.edata['h'] = e2
g2.edata["h"] = e2
g = dgl.batch([g1, g2])
r1 = g.edata['h'][g.edge_ids(4, 5)]
r2 = g1.edata['h'][g1.edge_ids(4, 5)]
r1 = g.edata["h"][g.edge_ids(4, 5)]
r2 = g1.edata["h"][g1.edge_ids(4, 5)]
assert F.array_equal(r1, r2)
@parametrize_idtype
def test_batch_no_edge(idtype):
g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
......@@ -202,22 +217,24 @@ def test_batch_no_edge(idtype):
g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
g2.add_nodes(6)
g2.add_edges([0, 1, 2, 5, 4, 5], [1 ,2 ,3, 4, 3, 0])
g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
g3 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
g3.add_nodes(1) # no edges
g = dgl.batch([g1, g3, g2]) # should not throw an error
g = dgl.batch([g1, g3, g2]) # should not throw an error
@parametrize_idtype
def test_batch_keeps_empty_data(idtype):
g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
g1.ndata["nh"] = F.tensor([])
g1.edata["eh"] = F.tensor([])
g1.edata["eh"] = F.tensor([])
g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
g2.ndata["nh"] = F.tensor([])
g2.edata["eh"] = F.tensor([])
g2.edata["eh"] = F.tensor([])
g = dgl.batch([g1, g2])
assert "nh" in g.ndata
assert "eh" in g.edata
assert "eh" in g.edata
def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
"""Internal function to compute batch information for subgraphs.
......@@ -235,12 +252,16 @@ def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
A dictionary mapping all node/edge type keys to the ``batch_num_objs``
array of corresponding graph.
"""
bucket_offset = np.expand_dims(np.cumsum(F.asnumpy(batch_num_objs), 0), -1) # (num_bkts, 1)
bucket_offset = np.expand_dims(
np.cumsum(F.asnumpy(batch_num_objs), 0), -1
) # (num_bkts, 1)
ret = {}
for key, induced_indices in zip(keys, induced_indices_arr):
# NOTE(Zihao): this implementation is not efficient and we can replace it with
# binary search in the future.
induced_indices = np.expand_dims(F.asnumpy(induced_indices), 0) # (1, num_nodes)
induced_indices = np.expand_dims(
F.asnumpy(induced_indices), 0
) # (1, num_nodes)
new_offset = np.sum((induced_indices < bucket_offset), 1) # (num_bkts,)
# start_offset = [0] + [new_offset[i-1] for i in range(1, n_bkts)]
start_offset = np.concatenate([np.zeros((1,)), new_offset[:-1]], 0)
......@@ -248,6 +269,7 @@ def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
ret[key] = F.tensor(new_batch_num_objs, dtype=F.dtype(batch_num_objs))
return ret
@parametrize_idtype
def test_set_batch_info(idtype):
ctx = F.ctx()
......@@ -257,13 +279,17 @@ def test_set_batch_info(idtype):
bg = dgl.batch([g1, g2])
batch_num_nodes = F.astype(bg.batch_num_nodes(), idtype)
batch_num_edges = F.astype(bg.batch_num_edges(), idtype)
# test homogeneous node subgraph
sg_n = dgl.node_subgraph(bg, list(range(10, 20)) + list(range(50, 60)))
induced_nodes = sg_n.ndata['_ID']
induced_edges = sg_n.edata['_ID']
new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes)
new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges)
induced_nodes = sg_n.ndata["_ID"]
induced_edges = sg_n.edata["_ID"]
new_batch_num_nodes = _get_subgraph_batch_info(
bg.ntypes, [induced_nodes], batch_num_nodes
)
new_batch_num_edges = _get_subgraph_batch_info(
bg.canonical_etypes, [induced_edges], batch_num_edges
)
sg_n.set_batch_num_nodes(new_batch_num_nodes)
sg_n.set_batch_num_edges(new_batch_num_edges)
subg_n1, subg_n2 = dgl.unbatch(sg_n)
......@@ -273,11 +299,17 @@ def test_set_batch_info(idtype):
assert subg_n2.num_edges() == subg2.num_edges()
# test homogeneous edge subgraph
sg_e = dgl.edge_subgraph(bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False)
sg_e = dgl.edge_subgraph(
bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False
)
induced_nodes = F.arange(0, bg.num_nodes(), idtype)
induced_edges = sg_e.edata['_ID']
new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes)
new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges)
induced_edges = sg_e.edata["_ID"]
new_batch_num_nodes = _get_subgraph_batch_info(
bg.ntypes, [induced_nodes], batch_num_nodes
)
new_batch_num_edges = _get_subgraph_batch_info(
bg.canonical_etypes, [induced_edges], batch_num_edges
)
sg_e.set_batch_num_nodes(new_batch_num_nodes)
sg_e.set_batch_num_edges(new_batch_num_edges)
subg_e1, subg_e2 = dgl.unbatch(sg_e)
......@@ -287,15 +319,14 @@ def test_set_batch_info(idtype):
assert subg_e2.num_nodes() == subg2.num_nodes()
if __name__ == '__main__':
#test_batch_unbatch()
#test_batch_unbatch1()
#test_batch_unbatch_frame()
#test_batch_unbatch2()
#test_batched_edge_ordering()
#test_batch_send_then_recv()
#test_batch_send_and_recv()
#test_batch_propagate()
#test_batch_no_edge()
if __name__ == "__main__":
# test_batch_unbatch()
# test_batch_unbatch1()
# test_batch_unbatch_frame()
# test_batch_unbatch2()
# test_batched_edge_ordering()
# test_batch_send_then_recv()
# test_batch_send_and_recv()
# test_batch_propagate()
# test_batch_no_edge()
test_set_batch_info(F.int32)
import dgl
import backend as F
import unittest
import pytest
import backend as F
import dgl
import pytest
from dgl.base import ALL
from test_utils import parametrize_idtype
from test_utils import check_graph_equal, get_cases
from test_utils import check_graph_equal, get_cases, parametrize_idtype
def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=None):
def check_equivalence_between_heterographs(
g1, g2, node_attrs=None, edge_attrs=None
):
assert g1.ntypes == g2.ntypes
assert g1.etypes == g2.etypes
assert g1.canonical_etypes == g2.canonical_etypes
......@@ -22,8 +24,8 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N
for ety in g1.canonical_etypes:
assert g1.number_of_edges(ety) == g2.number_of_edges(ety)
src1, dst1, eid1 = g1.edges(etype=ety, form='all')
src2, dst2, eid2 = g2.edges(etype=ety, form='all')
src1, dst1, eid1 = g1.edges(etype=ety, form="all")
src2, dst2, eid2 = g2.edges(etype=ety, form="all")
assert F.allclose(src1, src2)
assert F.allclose(dst1, dst2)
assert F.allclose(eid1, eid2)
......@@ -34,7 +36,8 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N
continue
for feat_name in node_attrs[nty]:
assert F.allclose(
g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name])
g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name]
)
if edge_attrs is not None:
for ety in edge_attrs.keys():
......@@ -42,10 +45,11 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N
continue
for feat_name in edge_attrs[ety]:
assert F.allclose(
g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name])
g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name]
)
@pytest.mark.parametrize('gs', get_cases(['two_hetero_batch']))
@pytest.mark.parametrize("gs", get_cases(["two_hetero_batch"]))
@parametrize_idtype
def test_topology(gs, idtype):
"""Test batching two DGLGraphs where some nodes are isolated in some relations"""
......@@ -65,30 +69,37 @@ def test_topology(gs, idtype):
for ntype in bg.ntypes:
print(ntype)
assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
g1.number_of_nodes(ntype), g2.number_of_nodes(ntype)]
g1.number_of_nodes(ntype),
g2.number_of_nodes(ntype),
]
assert bg.number_of_nodes(ntype) == (
g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype))
g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype)
)
# Test number of edges
for etype in bg.canonical_etypes:
assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
g1.number_of_edges(etype), g2.number_of_edges(etype)]
g1.number_of_edges(etype),
g2.number_of_edges(etype),
]
assert bg.number_of_edges(etype) == (
g1.number_of_edges(etype) + g2.number_of_edges(etype))
g1.number_of_edges(etype) + g2.number_of_edges(etype)
)
# Test relabeled nodes
for ntype in bg.ntypes:
assert list(F.asnumpy(bg.nodes(ntype))) == list(
range(bg.number_of_nodes(ntype)))
range(bg.number_of_nodes(ntype))
)
# Test relabeled edges
src, dst = bg.edges(etype=('user', 'follows', 'user'))
src, dst = bg.edges(etype=("user", "follows", "user"))
assert list(F.asnumpy(src)) == [0, 1, 4, 5]
assert list(F.asnumpy(dst)) == [1, 2, 5, 6]
src, dst = bg.edges(etype=('user', 'follows', 'developer'))
src, dst = bg.edges(etype=("user", "follows", "developer"))
assert list(F.asnumpy(src)) == [0, 1, 4, 5]
assert list(F.asnumpy(dst)) == [1, 2, 4, 5]
src, dst, eid = bg.edges(etype='plays', form='all')
src, dst, eid = bg.edges(etype="plays", form="all")
assert list(F.asnumpy(src)) == [0, 1, 2, 3, 4, 5, 6]
assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2, 2, 3]
assert list(F.asnumpy(eid)) == [0, 1, 2, 3, 4, 5, 6]
......@@ -113,19 +124,31 @@ def test_topology(gs, idtype):
@parametrize_idtype
def test_batching_batched(idtype):
"""Test batching a DGLGraph and a batched DGLGraph."""
g1 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0])
}, idtype=idtype, device=F.ctx())
g2 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0])
}, idtype=idtype, device=F.ctx())
g1 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
},
idtype=idtype,
device=F.ctx(),
)
g2 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
},
idtype=idtype,
device=F.ctx(),
)
bg1 = dgl.batch([g1, g2])
g3 = dgl.heterograph({
('user', 'follows', 'user'): ([0], [1]),
('user', 'plays', 'game'): ([1], [0])
}, idtype=idtype, device=F.ctx())
g3 = dgl.heterograph(
{
("user", "follows", "user"): ([0], [1]),
("user", "plays", "game"): ([1], [0]),
},
idtype=idtype,
device=F.ctx(),
)
bg2 = dgl.batch([bg1, g3])
assert bg2.idtype == idtype
assert bg2.device == F.ctx()
......@@ -137,27 +160,40 @@ def test_batching_batched(idtype):
# Test number of nodes
for ntype in bg2.ntypes:
assert F.asnumpy(bg2.batch_num_nodes(ntype)).tolist() == [
g1.number_of_nodes(ntype), g2.number_of_nodes(ntype), g3.number_of_nodes(ntype)]
g1.number_of_nodes(ntype),
g2.number_of_nodes(ntype),
g3.number_of_nodes(ntype),
]
assert bg2.number_of_nodes(ntype) == (
g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype) + g3.number_of_nodes(ntype))
g1.number_of_nodes(ntype)
+ g2.number_of_nodes(ntype)
+ g3.number_of_nodes(ntype)
)
# Test number of edges
for etype in bg2.canonical_etypes:
assert F.asnumpy(bg2.batch_num_edges(etype)).tolist() == [
g1.number_of_edges(etype), g2.number_of_edges(etype), g3.number_of_edges(etype)]
g1.number_of_edges(etype),
g2.number_of_edges(etype),
g3.number_of_edges(etype),
]
assert bg2.number_of_edges(etype) == (
g1.number_of_edges(etype) + g2.number_of_edges(etype) + g3.number_of_edges(etype))
g1.number_of_edges(etype)
+ g2.number_of_edges(etype)
+ g3.number_of_edges(etype)
)
# Test relabeled nodes
for ntype in bg2.ntypes:
assert list(F.asnumpy(bg2.nodes(ntype))) == list(
range(bg2.number_of_nodes(ntype)))
range(bg2.number_of_nodes(ntype))
)
# Test relabeled edges
src, dst = bg2.edges(etype='follows')
src, dst = bg2.edges(etype="follows")
assert list(F.asnumpy(src)) == [0, 1, 3, 4, 6]
assert list(F.asnumpy(dst)) == [1, 2, 4, 5, 7]
src, dst = bg2.edges(etype='plays')
src, dst = bg2.edges(etype="plays")
assert list(F.asnumpy(src)) == [0, 1, 3, 4, 7]
assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2]
......@@ -171,136 +207,228 @@ def test_batching_batched(idtype):
@parametrize_idtype
def test_features(idtype):
"""Test the features of batched DGLGraphs"""
g1 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0])
}, idtype=idtype, device=F.ctx())
g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
g1.nodes['game'].data['h1'] = F.tensor([[0.]])
g1.nodes['game'].data['h2'] = F.tensor([[1.]])
g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
g2 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0])
}, idtype=idtype, device=F.ctx())
g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
g2.nodes['game'].data['h1'] = F.tensor([[0.]])
g2.nodes['game'].data['h2'] = F.tensor([[1.]])
g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
g1 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
},
idtype=idtype,
device=F.ctx(),
)
g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
g1.nodes["game"].data["h1"] = F.tensor([[0.0]])
g1.nodes["game"].data["h2"] = F.tensor([[1.0]])
g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
g1.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])
g2 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
},
idtype=idtype,
device=F.ctx(),
)
g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
g2.nodes["game"].data["h1"] = F.tensor([[0.0]])
g2.nodes["game"].data["h2"] = F.tensor([[1.0]])
g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])
# test default setting
bg = dgl.batch([g1, g2])
assert F.allclose(bg.nodes['user'].data['h1'],
F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0))
assert F.allclose(bg.nodes['user'].data['h2'],
F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
assert F.allclose(bg.nodes['game'].data['h1'],
F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']], dim=0))
assert F.allclose(bg.nodes['game'].data['h2'],
F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0))
assert F.allclose(bg.edges['follows'].data['h1'],
F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
assert F.allclose(bg.edges['follows'].data['h2'],
F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']], dim=0))
assert F.allclose(bg.edges['plays'].data['h1'],
F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0))
assert F.allclose(
bg.nodes["user"].data["h1"],
F.cat(
[g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0
),
)
assert F.allclose(
bg.nodes["user"].data["h2"],
F.cat(
[g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
),
)
assert F.allclose(
bg.nodes["game"].data["h1"],
F.cat(
[g1.nodes["game"].data["h1"], g2.nodes["game"].data["h1"]], dim=0
),
)
assert F.allclose(
bg.nodes["game"].data["h2"],
F.cat(
[g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0
),
)
assert F.allclose(
bg.edges["follows"].data["h1"],
F.cat(
[g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
dim=0,
),
)
assert F.allclose(
bg.edges["follows"].data["h2"],
F.cat(
[g1.edges["follows"].data["h2"], g2.edges["follows"].data["h2"]],
dim=0,
),
)
assert F.allclose(
bg.edges["plays"].data["h1"],
F.cat(
[g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0
),
)
# test specifying ndata/edata
bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1'])
assert F.allclose(bg.nodes['user'].data['h2'],
F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
assert F.allclose(bg.nodes['game'].data['h2'],
F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0))
assert F.allclose(bg.edges['follows'].data['h1'],
F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
assert F.allclose(bg.edges['plays'].data['h1'],
F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0))
assert 'h1' not in bg.nodes['user'].data
assert 'h1' not in bg.nodes['game'].data
assert 'h2' not in bg.edges['follows'].data
bg = dgl.batch([g1, g2], ndata=["h2"], edata=["h1"])
assert F.allclose(
bg.nodes["user"].data["h2"],
F.cat(
[g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
),
)
assert F.allclose(
bg.nodes["game"].data["h2"],
F.cat(
[g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0
),
)
assert F.allclose(
bg.edges["follows"].data["h1"],
F.cat(
[g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
dim=0,
),
)
assert F.allclose(
bg.edges["plays"].data["h1"],
F.cat(
[g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0
),
)
assert "h1" not in bg.nodes["user"].data
assert "h1" not in bg.nodes["game"].data
assert "h2" not in bg.edges["follows"].data
# Test unbatching graphs
g3, g4 = dgl.unbatch(bg)
check_equivalence_between_heterographs(
g1, g3,
node_attrs={'user': ['h2'], 'game': ['h2']},
edge_attrs={('user', 'follows', 'user'): ['h1']})
g1,
g3,
node_attrs={"user": ["h2"], "game": ["h2"]},
edge_attrs={("user", "follows", "user"): ["h1"]},
)
check_equivalence_between_heterographs(
g2, g4,
node_attrs={'user': ['h2'], 'game': ['h2']},
edge_attrs={('user', 'follows', 'user'): ['h1']})
g2,
g4,
node_attrs={"user": ["h2"], "game": ["h2"]},
edge_attrs={("user", "follows", "user"): ["h1"]},
)
@unittest.skipIf(F.backend_name == 'mxnet', reason="MXNet does not support split array with zero-length segment.")
@unittest.skipIf(
F.backend_name == "mxnet",
reason="MXNet does not support split array with zero-length segment.",
)
@parametrize_idtype
def test_empty_relation(idtype):
"""Test the features of batched DGLGraphs"""
g1 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([], [])
}, idtype=idtype, device=F.ctx())
g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
g2 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0])
}, idtype=idtype, device=F.ctx())
g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
g2.nodes['game'].data['h1'] = F.tensor([[0.]])
g2.nodes['game'].data['h2'] = F.tensor([[1.]])
g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
g1 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([], []),
},
idtype=idtype,
device=F.ctx(),
)
g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
g2 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
},
idtype=idtype,
device=F.ctx(),
)
g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
g2.nodes["game"].data["h1"] = F.tensor([[0.0]])
g2.nodes["game"].data["h2"] = F.tensor([[1.0]])
g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])
bg = dgl.batch([g1, g2])
# Test number of nodes
for ntype in bg.ntypes:
assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
g1.number_of_nodes(ntype), g2.number_of_nodes(ntype)]
g1.number_of_nodes(ntype),
g2.number_of_nodes(ntype),
]
# Test number of edges
for etype in bg.canonical_etypes:
assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
g1.number_of_edges(etype), g2.number_of_edges(etype)]
g1.number_of_edges(etype),
g2.number_of_edges(etype),
]
# Test features
assert F.allclose(bg.nodes['user'].data['h1'],
F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0))
assert F.allclose(bg.nodes['user'].data['h2'],
F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
assert F.allclose(bg.edges['follows'].data['h1'],
F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
assert F.allclose(bg.edges['plays'].data['h1'],
g2.edges['plays'].data['h1'])
assert F.allclose(
bg.nodes["user"].data["h1"],
F.cat(
[g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0
),
)
assert F.allclose(
bg.nodes["user"].data["h2"],
F.cat(
[g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
),
)
assert F.allclose(bg.nodes["game"].data["h1"], g2.nodes["game"].data["h1"])
assert F.allclose(bg.nodes["game"].data["h2"], g2.nodes["game"].data["h2"])
assert F.allclose(
bg.edges["follows"].data["h1"],
F.cat(
[g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
dim=0,
),
)
assert F.allclose(
bg.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]
)
# Test unbatching graphs
g3, g4 = dgl.unbatch(bg)
check_equivalence_between_heterographs(
g1, g3,
node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
edge_attrs={('user', 'follows', 'user'): ['h1']})
g1,
g3,
node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]},
edge_attrs={("user", "follows", "user"): ["h1"]},
)
check_equivalence_between_heterographs(
g2, g4,
node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
edge_attrs={('user', 'follows', 'user'): ['h1']})
g2,
g4,
node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]},
edge_attrs={("user", "follows", "user"): ["h1"]},
)
# Test graphs without edges
g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4})
g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5})
g1 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 0, "v": 4})
g2 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 1, "v": 5})
dgl.batch([g1, g2])
......@@ -314,10 +442,10 @@ def test_unbatch2(idtype):
bnn = F.tensor([8, 4])
bne = F.tensor([6, 3])
f1, f2 = dgl.unbatch(bg, node_split=bnn, edge_split=bne)
u, v = f1.edges(order='eid')
u, v = f1.edges(order="eid")
assert F.allclose(u, F.tensor([0, 1, 2, 4, 5, 6]))
assert F.allclose(v, F.tensor([1, 2, 3, 5, 6, 7]))
u, v = f2.edges(order='eid')
u, v = f2.edges(order="eid")
assert F.allclose(u, F.tensor([0, 1, 2]))
assert F.allclose(v, F.tensor([1, 2, 3]))
......@@ -331,28 +459,42 @@ def test_unbatch2(idtype):
@parametrize_idtype
def test_slice_batch(idtype):
g1 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([], []),
('user', 'follows', 'game'): ([0, 0], [1, 4])
}, idtype=idtype, device=F.ctx())
g2 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1], [0, 0]),
('user', 'follows', 'game'): ([0, 1], [1, 4])
}, num_nodes_dict={'user': 4, 'game': 6}, idtype=idtype, device=F.ctx())
g3 = dgl.heterograph({
('user', 'follows', 'user'): ([0], [2]),
('user', 'plays', 'game'): ([1, 2], [3, 4]),
('user', 'follows', 'game'): ([], [])
}, idtype=idtype, device=F.ctx())
g1 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([], []),
("user", "follows", "game"): ([0, 0], [1, 4]),
},
idtype=idtype,
device=F.ctx(),
)
g2 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1], [0, 0]),
("user", "follows", "game"): ([0, 1], [1, 4]),
},
num_nodes_dict={"user": 4, "game": 6},
idtype=idtype,
device=F.ctx(),
)
g3 = dgl.heterograph(
{
("user", "follows", "user"): ([0], [2]),
("user", "plays", "game"): ([1, 2], [3, 4]),
("user", "follows", "game"): ([], []),
},
idtype=idtype,
device=F.ctx(),
)
g_list = [g1, g2, g3]
bg = dgl.batch(g_list)
bg.nodes['user'].data['h1'] = F.randn((bg.num_nodes('user'), 2))
bg.nodes['user'].data['h2'] = F.randn((bg.num_nodes('user'), 5))
bg.edges[('user', 'follows', 'user')].data['h1'] = F.randn((
bg.num_edges(('user', 'follows', 'user')), 2))
for fmat in ['coo', 'csr', 'csc']:
bg.nodes["user"].data["h1"] = F.randn((bg.num_nodes("user"), 2))
bg.nodes["user"].data["h2"] = F.randn((bg.num_nodes("user"), 5))
bg.edges[("user", "follows", "user")].data["h1"] = F.randn(
(bg.num_edges(("user", "follows", "user")), 2)
)
for fmat in ["coo", "csr", "csc"]:
bg = bg.formats(fmat)
for i in range(len(g_list)):
g_i = g_list[i]
......@@ -364,22 +506,28 @@ def test_slice_batch(idtype):
for nty in g_i.ntypes:
assert g_i.num_nodes(nty) == g_slice.num_nodes(nty)
for feat in g_i.nodes[nty].data:
assert F.allclose(g_i.nodes[nty].data[feat], g_slice.nodes[nty].data[feat])
assert F.allclose(
g_i.nodes[nty].data[feat], g_slice.nodes[nty].data[feat]
)
for ety in g_i.canonical_etypes:
assert g_i.num_edges(ety) == g_slice.num_edges(ety)
for feat in g_i.edges[ety].data:
assert F.allclose(g_i.edges[ety].data[feat], g_slice.edges[ety].data[feat])
assert F.allclose(
g_i.edges[ety].data[feat], g_slice.edges[ety].data[feat]
)
@parametrize_idtype
def test_batch_keeps_empty_data(idtype):
g1 = dgl.heterograph({("a", "to", "a"): ([], [])}
).astype(idtype).to(F.ctx())
g1 = (
dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
)
g1.nodes["a"].data["nh"] = F.tensor([])
g1.edges[("a", "to", "a")].data["eh"] = F.tensor([])
g2 = dgl.heterograph({("a", "to", "a"): ([], [])}
).astype(idtype).to(F.ctx())
g2 = (
dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
)
g2.nodes["a"].data["nh"] = F.tensor([])
g2.edges[("a", "to", "a")].data["eh"] = F.tensor([])
g = dgl.batch([g1, g2])
......@@ -387,27 +535,35 @@ def test_batch_keeps_empty_data(idtype):
assert "eh" in g.edges[("a", "to", "a")].data
@unittest.skipIf(F._default_context_str == 'gpu', reason="Issue is not related with GPU")
@unittest.skipIf(
F._default_context_str == "gpu", reason="Issue is not related with GPU"
)
def test_batch_netypes():
# Test for https://github.com/dmlc/dgl/issues/2808
import networkx as nx
B = nx.DiGraph()
B.add_nodes_from([1, 2, 3, 4], bipartite=0,
some_attr=F.tensor([1, 2, 3, 4], dtype=F.float32))
B.add_nodes_from(
[1, 2, 3, 4],
bipartite=0,
some_attr=F.tensor([1, 2, 3, 4], dtype=F.float32),
)
B.add_nodes_from(["a", "b", "c"], bipartite=1)
B.add_edges_from([(1, "a"), (1, "b"), (2, "b"),
(2, "c"), (3, "c"), (4, "a")])
g_dict = {0: dgl.bipartite_from_networkx(B, 'A', 'e', 'B'),
1: dgl.bipartite_from_networkx(B, 'B', 'e', 'A'),
2: dgl.bipartite_from_networkx(B, 'A', 'e', 'B', u_attrs=['some_attr']),
3: dgl.bipartite_from_networkx(B, 'B', 'e', 'A', u_attrs=['some_attr'])
}
B.add_edges_from(
[(1, "a"), (1, "b"), (2, "b"), (2, "c"), (3, "c"), (4, "a")]
)
g_dict = {
0: dgl.bipartite_from_networkx(B, "A", "e", "B"),
1: dgl.bipartite_from_networkx(B, "B", "e", "A"),
2: dgl.bipartite_from_networkx(B, "A", "e", "B", u_attrs=["some_attr"]),
3: dgl.bipartite_from_networkx(B, "B", "e", "A", u_attrs=["some_attr"]),
}
for _, g in g_dict.items():
dgl.batch((g, g, g))
if __name__ == '__main__':
if __name__ == "__main__":
# test_topology('int32')
# test_batching_batched('int32')
# test_batched_features('int32')
......
......@@ -2,10 +2,10 @@ import os
import unittest
import backend as F
import numpy as np
import pytest
import dgl
import numpy as np
import pytest
@unittest.skipIf(os.name == "nt", reason="Cython only works on linux")
......
......@@ -2,12 +2,12 @@ import pickle
import unittest
import backend as F
import numpy as np
from test_utils import parametrize_idtype
import dgl
import dgl.ndarray as nd
import numpy as np
from dgl.frame import Column
from test_utils import parametrize_idtype
def test_column_subcolumn():
......
import unittest
import backend as F
import numpy as np
import dgl
import numpy as np
@unittest.skipIf(
......
......@@ -4,18 +4,18 @@ from collections import Counter
from itertools import product
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as ssp
import test_utils
from dgl import DGLError
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
fill_value = {"sum": 0, "max": float("-inf")}
feat_size = 2
......@@ -51,7 +51,6 @@ def create_test_heterograph(idtype):
@parametrize_idtype
def test_unary_copy_u(idtype):
def _test(mfunc):
g = create_test_heterograph(idtype)
x1 = F.randn((g.num_nodes("user"), feat_size))
......@@ -108,7 +107,6 @@ def test_unary_copy_u(idtype):
@parametrize_idtype
def test_unary_copy_e(idtype):
def _test(mfunc):
g = create_test_heterograph(idtype)
feat_size = 2
......@@ -168,7 +166,6 @@ def test_unary_copy_e(idtype):
@parametrize_idtype
def test_binary_op(idtype):
def _test(lhs, rhs, binary_op):
g = create_test_heterograph(idtype)
n1 = F.randn((g.num_nodes("user"), feat_size))
......@@ -237,6 +234,7 @@ def test_binary_op(idtype):
loss = F.sum(r2.view(-1), 0)
F.backward(loss)
n_grad2 = F.grad(g.nodes["game"].data["h"])
# correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(
......
from itertools import product
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import numpy as np
import backend as F
from itertools import product
from test_utils import parametrize_idtype, get_cases
import pytest
from test_utils import get_cases, parametrize_idtype
def udf_copy_src(edges):
return {'m': edges.src['u']}
return {"m": edges.src["u"]}
def udf_copy_edge(edges):
return {'m': edges.data['e']}
return {"m": edges.data["e"]}
def udf_mean(nodes):
return {'r2': F.mean(nodes.mailbox['m'], 1)}
return {"r2": F.mean(nodes.mailbox["m"], 1)}
def udf_sum(nodes):
return {'r2': F.sum(nodes.mailbox['m'], 1)}
return {"r2": F.sum(nodes.mailbox["m"], 1)}
def udf_max(nodes):
return {'r2': F.max(nodes.mailbox['m'], 1)}
return {"r2": F.max(nodes.mailbox["m"], 1)}
D1 = 5
D2 = 3
D3 = 4
D4 = 10 # NOTE(xiang): used to dot feature vector
builtin = {'sum': fn.sum, 'max': fn.max, 'mean': fn.mean}
udf_reduce = {'sum': udf_sum, 'max': udf_max, 'mean': udf_mean}
fill_value = {'sum': 0, 'max': float("-inf")}
D4 = 10 # NOTE(xiang): used to dot feature vector
builtin = {"sum": fn.sum, "max": fn.max, "mean": fn.mean}
udf_reduce = {"sum": udf_sum, "max": udf_max, "mean": udf_mean}
fill_value = {"sum": 0, "max": float("-inf")}
def generate_feature(g, broadcast='none', binary_op='none'):
def generate_feature(g, broadcast="none", binary_op="none"):
"""Create graph with src, edge, dst feature. broadcast can be 'u',
'e', 'v', 'none'
"""
np.random.seed(31)
nv = g.number_of_nodes()
ne = g.number_of_edges()
if binary_op == 'dot':
if broadcast == 'e':
if binary_op == "dot":
if broadcast == "e":
u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1, D4)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
elif broadcast == 'u':
elif broadcast == "u":
u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
elif broadcast == 'v':
elif broadcast == "v":
u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4)))
......@@ -57,15 +64,15 @@ def generate_feature(g, broadcast='none', binary_op='none'):
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
else:
if broadcast == 'e':
if broadcast == "e":
u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
elif broadcast == 'u':
elif broadcast == "u":
u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
elif broadcast == 'v':
elif broadcast == "v":
u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1)))
......@@ -73,7 +80,11 @@ def generate_feature(g, broadcast='none', binary_op='none'):
u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
return F.astype(u, F.float32), F.astype(v, F.float32), F.astype(e, F.float32)
return (
F.astype(u, F.float32),
F.astype(v, F.float32),
F.astype(e, F.float32),
)
def test_copy_src_reduce():
......@@ -83,60 +94,65 @@ def test_copy_src_reduce():
# https://github.com/dmlc/dgl/issues/761
g.add_edges(g.nodes(), g.nodes())
g = g.to(F.ctx())
hu, hv, he = generate_feature(g, 'none', 'none')
hu, hv, he = generate_feature(g, "none", "none")
if partial:
nid = F.tensor(list(range(0, 100, 2)), g.idtype)
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
with F.record_grad():
if partial:
g.pull(nid, fn.copy_u(u='u', out='m'),
builtin[red](msg='m', out='r1'))
g.pull(
nid,
fn.copy_u(u="u", out="m"),
builtin[red](msg="m", out="r1"),
)
else:
g.update_all(fn.copy_u(u='u', out='m'),
builtin[red](msg='m', out='r1'))
r1 = g.ndata['r1']
g.update_all(
fn.copy_u(u="u", out="m"), builtin[red](msg="m", out="r1")
)
r1 = g.ndata["r1"]
F.backward(F.reduce_sum(r1))
n_grad1 = F.grad(g.ndata['u'])
n_grad1 = F.grad(g.ndata["u"])
# reset grad
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
with F.record_grad():
if partial:
g.pull(nid, udf_copy_src, udf_reduce[red])
else:
g.update_all(udf_copy_src, udf_reduce[red])
r2 = g.ndata['r2']
r2 = g.ndata["r2"]
F.backward(F.reduce_sum(r2))
n_grad2 = F.grad(g.ndata['u'])
n_grad2 = F.grad(g.ndata["u"])
def _print_error(a, b):
print("ERROR: Test copy_src_{} partial: {}".
format(red, partial))
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
print("ERROR: Test copy_src_{} partial: {}".format(red, partial))
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2):
_print_error(r1, r2)
assert F.allclose(r1, r2)
if not F.allclose(n_grad1, n_grad2):
print('node grad')
print("node grad")
_print_error(n_grad1, n_grad2)
assert(F.allclose(n_grad1, n_grad2))
assert F.allclose(n_grad1, n_grad2)
_test('sum', False)
_test('max', False)
_test('mean', False)
_test('sum', True)
_test('max', True)
_test('mean', True)
_test("sum", False)
_test("max", False)
_test("mean", False)
_test("sum", True)
_test("max", True)
_test("mean", True)
def test_copy_edge_reduce():
......@@ -145,80 +161,85 @@ def test_copy_edge_reduce():
# NOTE(zihao): add self-loop to avoid zero-degree nodes.
g.add_edges(g.nodes(), g.nodes())
g = g.to(F.ctx())
hu, hv, he = generate_feature(g, 'none', 'none')
hu, hv, he = generate_feature(g, "none", "none")
if partial:
nid = F.tensor(list(range(0, 100, 2)), g.idtype)
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
with F.record_grad():
if partial:
g.pull(nid, fn.copy_e(e='e', out='m'),
builtin[red](msg='m', out='r1'))
g.pull(
nid,
fn.copy_e(e="e", out="m"),
builtin[red](msg="m", out="r1"),
)
else:
g.update_all(fn.copy_e(e='e', out='m'),
builtin[red](msg='m', out='r1'))
r1 = g.ndata['r1']
g.update_all(
fn.copy_e(e="e", out="m"), builtin[red](msg="m", out="r1")
)
r1 = g.ndata["r1"]
F.backward(F.reduce_sum(r1))
e_grad1 = F.grad(g.edata['e'])
e_grad1 = F.grad(g.edata["e"])
# reset grad
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
with F.record_grad():
if partial:
g.pull(nid, udf_copy_edge, udf_reduce[red])
else:
g.update_all(udf_copy_edge, udf_reduce[red])
r2 = g.ndata['r2']
r2 = g.ndata["r2"]
F.backward(F.reduce_sum(r2))
e_grad2 = F.grad(g.edata['e'])
e_grad2 = F.grad(g.edata["e"])
def _print_error(a, b):
print("ERROR: Test copy_edge_{} partial: {}".
format(red, partial))
print("ERROR: Test copy_edge_{} partial: {}".format(red, partial))
return
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2):
_print_error(r1, r2)
assert F.allclose(r1, r2)
if not F.allclose(e_grad1, e_grad2):
print('edge gradient')
print("edge gradient")
_print_error(e_grad1, e_grad2)
assert(F.allclose(e_grad1, e_grad2))
assert F.allclose(e_grad1, e_grad2)
_test('sum', False)
_test('max', False)
_test('mean', False)
_test('sum', True)
_test('max', True)
_test('mean', True)
_test("sum", False)
_test("max", False)
_test("mean", False)
_test("sum", True)
_test("max", True)
_test("mean", True)
def test_all_binary_builtins():
def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast='none'):
def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast="none"):
# initialize node/edge features with uniform(-1, 1)
hu, hv, he = generate_feature(g, broadcast, binary_op)
if binary_op == 'div':
if binary_op == "div":
# op = div
# lhs range: [-1, 1]
# rhs range: [1, 2]
# result range: [-1, 1]
if rhs == 'u':
if rhs == "u":
hu = (hu + 3) / 2
elif rhs == 'v':
elif rhs == "v":
hv = (hv + 3) / 2
elif rhs == 'e':
elif rhs == "e":
he = (he + 3) / 2
if binary_op == 'add' or binary_op == 'sub':
if binary_op == "add" or binary_op == "sub":
# op = add, sub
# lhs range: [-1/2, 1/2]
# rhs range: [-1/2, 1/2]
......@@ -227,9 +248,9 @@ def test_all_binary_builtins():
hv = hv / 2
he = he / 2
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs)
builtin_msg = getattr(fn, builtin_msg_name)
......@@ -245,18 +266,18 @@ def test_all_binary_builtins():
with F.record_grad():
if partial:
g.pull(nid, builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1'))
g.pull(nid, builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1"))
else:
g.update_all(builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1'))
r1 = g.ndata.pop('r1')
g.update_all(builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1"))
r1 = g.ndata.pop("r1")
F.backward(F.reduce_sum(r1))
lhs_grad_1 = F.grad(target_feature_switch(g, lhs))
rhs_grad_1 = F.grad(target_feature_switch(g, rhs))
# reset grad
g.ndata['u'] = F.attach_grad(F.clone(hu))
g.ndata['v'] = F.attach_grad(F.clone(hv))
g.edata['e'] = F.attach_grad(F.clone(he))
g.ndata["u"] = F.attach_grad(F.clone(hu))
g.ndata["v"] = F.attach_grad(F.clone(hv))
g.edata["e"] = F.attach_grad(F.clone(he))
def target_switch(edges, target):
if target == "u":
......@@ -266,7 +287,7 @@ def test_all_binary_builtins():
elif target == "e":
return edges.data
else:
assert(0), "Unknown target {}".format(target)
assert 0, "Unknown target {}".format(target)
def mfunc(edges):
op = getattr(F, binary_op)
......@@ -282,15 +303,15 @@ def test_all_binary_builtins():
def rfunc(nodes):
op = getattr(F, reducer)
return {"r2": op(nodes.mailbox['m'], 1)}
return {"r2": op(nodes.mailbox["m"], 1)}
with F.record_grad():
if partial:
g.pull(nid, mfunc, rfunc)
else:
g.update_all(mfunc, rfunc)
r2 = g.ndata.pop('r2')
F.backward(F.reduce_sum(r2), F.tensor([1.]))
r2 = g.ndata.pop("r2")
F.backward(F.reduce_sum(r2), F.tensor([1.0]))
lhs_grad_2 = F.grad(target_feature_switch(g, lhs))
rhs_grad_2 = F.grad(target_feature_switch(g, rhs))
......@@ -298,27 +319,32 @@ def test_all_binary_builtins():
atol = 1e-4
def _print_error(a, b):
print("ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}".
format(lhs, binary_op, rhs, reducer, broadcast, partial))
print(
"ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}".format(
lhs, binary_op, rhs, reducer, broadcast, partial
)
)
return
if lhs == 'u':
if lhs == "u":
lhs_data = hu
elif lhs == 'v':
elif lhs == "v":
lhs_data = hv
elif lhs == 'e':
elif lhs == "e":
lhs_data = he
if rhs == 'u':
if rhs == "u":
rhs_data = hu
elif rhs == 'v':
elif rhs == "v":
rhs_data = hv
elif rhs == 'e':
elif rhs == "e":
rhs_data = he
print("lhs", F.asnumpy(lhs_data).tolist())
print("rhs", F.asnumpy(rhs_data).tolist())
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y, rtol, atol):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2, rtol, atol):
_print_error(r1, r2)
......@@ -327,12 +353,12 @@ def test_all_binary_builtins():
if not F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol):
print("left grad")
_print_error(lhs_grad_1, lhs_grad_2)
assert(F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol))
assert F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol)
if not F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol):
print("right grad")
_print_error(rhs_grad_1, rhs_grad_2)
assert(F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol))
assert F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol)
g = dgl.DGLGraph()
g.add_nodes(20)
......@@ -359,20 +385,30 @@ def test_all_binary_builtins():
for broadcast in ["none", lhs, rhs]:
for partial in [False, True]:
print(lhs, rhs, binary_op, reducer, broadcast, partial)
_test(g, lhs, rhs, binary_op, reducer, partial, nid,
broadcast=broadcast)
_test(
g,
lhs,
rhs,
binary_op,
reducer,
partial,
nid,
broadcast=broadcast,
)
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(['homo-zero-degree']))
@pytest.mark.parametrize("g", get_cases(["homo-zero-degree"]))
def test_mean_zero_degree(g, idtype):
g = g.astype(idtype).to(F.ctx())
g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'x'))
g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
g.update_all(fn.copy_u("h", "m"), fn.mean("m", "x"))
deg = F.asnumpy(g.in_degrees())
v = F.tensor(np.where(deg == 0)[0])
assert F.allclose(F.gather_row(g.ndata['x'], v), F.zeros((len(v), 3)))
assert F.allclose(F.gather_row(g.ndata["x"], v), F.zeros((len(v), 3)))
if __name__ == '__main__':
if __name__ == "__main__":
test_copy_src_reduce()
test_copy_edge_reduce()
test_all_binary_builtins()
import math
import numbers
import backend as F
import dgl
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as sp
import networkx as nx
import dgl
import backend as F
from dgl import DGLError
import pytest
# graph generation: a random graph with 10 nodes
# and 20 edges.
......@@ -22,6 +25,7 @@ def edge_pair_input(sort=False):
dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5]
return src, dst
def nx_input():
g = nx.DiGraph()
src, dst = edge_pair_input()
......@@ -29,22 +33,26 @@ def nx_input():
g.add_edge(*e, id=i)
return g
def elist_input():
src, dst = edge_pair_input()
return list(zip(src, dst))
def scipy_coo_input():
src, dst = edge_pair_input()
return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10))
return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10))
def scipy_csr_input():
src, dst = edge_pair_input()
csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10)).tocsr()
csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)).tocsr()
csr.sort_indices()
# src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
# dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
return csr
def gen_by_mutation():
g = dgl.DGLGraph()
src, dst = edge_pair_input()
......@@ -52,9 +60,11 @@ def gen_by_mutation():
g.add_edges(src, dst)
return g
def gen_from_data(data, readonly, sort):
return dgl.DGLGraph(data, readonly=readonly, sort_csr=True)
def test_query():
def _test_one(g):
assert g.number_of_nodes() == 10
......@@ -63,45 +73,63 @@ def test_query():
for i in range(10):
assert g.has_nodes(i)
assert not g.has_nodes(11)
assert F.allclose(g.has_nodes([0,2,10,11]), F.tensor([1,1,0,0]))
assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
src, dst = edge_pair_input()
for u, v in zip(src, dst):
assert g.has_edges_between(u, v)
assert not g.has_edges_between(0, 0)
assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
assert set(F.asnumpy(g.successors(2))) == set([7,3])
assert F.allclose(
g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
)
assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
assert set(F.asnumpy(g.successors(2))) == set([7, 3])
assert g.edge_ids(4,4) == 5
assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([5,0]))
assert g.edge_ids(4, 4) == 5
assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0]))
src, dst = g.find_edges([3, 6, 5])
assert F.allclose(src, F.tensor([5, 7, 4]))
assert F.allclose(dst, F.tensor([9, 9, 4]))
src, dst, eid = g.in_edges(9, form='all')
src, dst, eid = g.in_edges(9, form="all")
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7)])
src, dst, eid = g.in_edges([9,0,8], form='all') # test node#0 has no in edges
assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)])
src, dst, eid = g.in_edges(
[9, 0, 8], form="all"
) # test node#0 has no in edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7),(3,8,9),(7,8,12)])
assert set(tup) == set(
[(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)]
)
src, dst, eid = g.out_edges(0, form='all')
src, dst, eid = g.out_edges(0, form="all")
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4)])
src, dst, eid = g.out_edges([0,4,8], form='all') # test node#8 has no out edges
assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)])
src, dst, eid = g.out_edges(
[0, 4, 8], form="all"
) # test node#8 has no out edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4),(4,3,2),(4,4,5),(4,9,7),(4,1,8)])
src, dst, eid = g.edges('all', 'eid')
assert set(tup) == set(
[
(0, 9, 0),
(0, 6, 1),
(0, 4, 4),
(4, 3, 2),
(4, 4, 5),
(4, 9, 7),
(4, 1, 8),
]
)
src, dst, eid = g.edges("all", "eid")
t_src, t_dst = edge_pair_input()
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(eid)) == list(range(20))
src, dst, eid = g.edges('all', 'srcdst')
src, dst, eid = g.edges("all", "srcdst")
t_src, t_dst = edge_pair_input()
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
......@@ -116,9 +144,13 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)),
scipy_coo_input().toarray().T,
)
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)),
scipy_coo_input().toarray(),
)
def _test(g):
# test twice to see whether the cached format works or not
......@@ -132,48 +164,73 @@ def test_query():
for i in range(10):
assert g.has_nodes(i)
assert not g.has_nodes(11)
assert F.allclose(g.has_nodes([0,2,10,11]), F.tensor([1,1,0,0]))
assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
src, dst = edge_pair_input(sort=True)
for u, v in zip(src, dst):
assert g.has_edges_between(u, v)
assert not g.has_edges_between(0, 0)
assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
assert set(F.asnumpy(g.successors(2))) == set([7,3])
assert F.allclose(
g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
)
assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
assert set(F.asnumpy(g.successors(2))) == set([7, 3])
# src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
# dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
# eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
assert g.edge_ids(4,4) == 11
assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([11,2]))
assert g.edge_ids(4, 4) == 11
assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([11, 2]))
src, dst = g.find_edges([3, 6, 5])
assert F.allclose(src, F.tensor([1, 2, 2]))
assert F.allclose(dst, F.tensor([3, 7, 3]))
src, dst, eid = g.in_edges(9, form='all')
src, dst, eid = g.in_edges(9, form="all")
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12)])
src, dst, eid = g.in_edges([9,0,8], form='all') # test node#0 has no in edges
assert set(tup) == set([(0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12)])
src, dst, eid = g.in_edges(
[9, 0, 8], form="all"
) # test node#0 has no in edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12),(3,8,8),(7,8,17)])
src, dst, eid = g.out_edges(0, form='all')
assert set(tup) == set(
[
(0, 9, 2),
(5, 9, 14),
(7, 9, 18),
(4, 9, 12),
(3, 8, 8),
(7, 8, 17),
]
)
src, dst, eid = g.out_edges(0, form="all")
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0)])
src, dst, eid = g.out_edges([0,4,8], form='all') # test node#8 has no out edges
assert set(tup) == set([(0, 9, 2), (0, 6, 1), (0, 4, 0)])
src, dst, eid = g.out_edges(
[0, 4, 8], form="all"
) # test node#8 has no out edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0),(4,3,10),(4,4,11),(4,9,12),(4,1,9)])
src, dst, eid = g.edges('all', 'eid')
assert set(tup) == set(
[
(0, 9, 2),
(0, 6, 1),
(0, 4, 0),
(4, 3, 10),
(4, 4, 11),
(4, 9, 12),
(4, 1, 9),
]
)
src, dst, eid = g.edges("all", "eid")
t_src, t_dst = edge_pair_input(sort=True)
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(eid)) == list(range(20))
src, dst, eid = g.edges('all', 'srcdst')
src, dst, eid = g.edges("all", "srcdst")
t_src, t_dst = edge_pair_input(sort=True)
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
......@@ -188,9 +245,13 @@ def test_query():
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
F.sparse_to_numpy(g.adjacency_matrix(transpose=True)),
scipy_coo_input().toarray().T,
)
assert np.array_equal(
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
F.sparse_to_numpy(g.adjacency_matrix(transpose=False)),
scipy_coo_input().toarray(),
)
def _test_csr(g):
# test twice to see whether the cached format works or not
......@@ -199,18 +260,18 @@ def test_query():
def _test_edge_ids():
g = gen_by_mutation()
eids = g.edge_ids([4,0], [4,9])
eids = g.edge_ids([4, 0], [4, 9])
assert eids.shape[0] == 2
eid = g.edge_ids(4, 4)
assert isinstance(eid, numbers.Number)
with pytest.raises(DGLError):
eids = g.edge_ids([9,0], [4,9])
eids = g.edge_ids([9, 0], [4, 9])
with pytest.raises(DGLError):
eid = g.edge_ids(4, 5)
g.add_edges(0, 4)
eids = g.edge_ids([0,0], [4,9])
eids = g.edge_ids([0, 0], [4, 9])
eid = g.edge_ids(0, 4)
_test(gen_by_mutation())
......@@ -224,75 +285,104 @@ def test_query():
_test_csr(gen_from_data(scipy_csr_input(), True, False))
_test_edge_ids()
def test_mutation():
g = dgl.DGLGraph()
g = g.to(F.ctx())
# test add nodes with data
g.add_nodes(5)
g.add_nodes(5, {'h' : F.ones((5, 2))})
g.add_nodes(5, {"h": F.ones((5, 2))})
ans = F.cat([F.zeros((5, 2)), F.ones((5, 2))], 0)
assert F.allclose(ans, g.ndata['h'])
g.ndata['w'] = 2 * F.ones((10, 2))
assert F.allclose(2 * F.ones((10, 2)), g.ndata['w'])
assert F.allclose(ans, g.ndata["h"])
g.ndata["w"] = 2 * F.ones((10, 2))
assert F.allclose(2 * F.ones((10, 2)), g.ndata["w"])
# test add edges with data
g.add_edges([2, 3], [3, 4])
g.add_edges([0, 1], [1, 2], {'m' : F.ones((2, 2))})
g.add_edges([0, 1], [1, 2], {"m": F.ones((2, 2))})
ans = F.cat([F.zeros((2, 2)), F.ones((2, 2))], 0)
assert F.allclose(ans, g.edata['m'])
assert F.allclose(ans, g.edata["m"])
def test_scipy_adjmat():
g = dgl.DGLGraph()
g.add_nodes(10)
g.add_edges(range(9), range(1, 10))
adj_0 = g.adj(scipy_fmt='csr')
adj_1 = g.adj(scipy_fmt='coo')
adj_0 = g.adj(scipy_fmt="csr")
adj_1 = g.adj(scipy_fmt="coo")
assert np.array_equal(adj_0.toarray(), adj_1.toarray())
adj_t0 = g.adj(transpose=False, scipy_fmt='csr')
adj_t_1 = g.adj(transpose=False, scipy_fmt='coo')
adj_t0 = g.adj(transpose=False, scipy_fmt="csr")
adj_t_1 = g.adj(transpose=False, scipy_fmt="coo")
assert np.array_equal(adj_0.toarray(), adj_1.toarray())
def test_incmat():
g = dgl.DGLGraph()
g.add_nodes(4)
g.add_edges(0, 1) # 0
g.add_edges(0, 2) # 1
g.add_edges(0, 3) # 2
g.add_edges(2, 3) # 3
g.add_edges(1, 1) # 4
inc_in = F.sparse_to_numpy(g.incidence_matrix('in'))
inc_out = F.sparse_to_numpy(g.incidence_matrix('out'))
inc_both = F.sparse_to_numpy(g.incidence_matrix('both'))
g.add_edges(0, 1) # 0
g.add_edges(0, 2) # 1
g.add_edges(0, 3) # 2
g.add_edges(2, 3) # 3
g.add_edges(1, 1) # 4
inc_in = F.sparse_to_numpy(g.incidence_matrix("in"))
inc_out = F.sparse_to_numpy(g.incidence_matrix("out"))
inc_both = F.sparse_to_numpy(g.incidence_matrix("both"))
print(inc_in)
print(inc_out)
print(inc_both)
assert np.allclose(
inc_in,
np.array([[0., 0., 0., 0., 0.],
[1., 0., 0., 0., 1.],
[0., 1., 0., 0., 0.],
[0., 0., 1., 1., 0.]]))
inc_in,
np.array(
[
[0.0, 0.0, 0.0, 0.0, 0.0],
[1.0, 0.0, 0.0, 0.0, 1.0],
[0.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 1.0, 0.0],
]
),
)
assert np.allclose(
inc_out,
np.array([[1., 1., 1., 0., 0.],
[0., 0., 0., 0., 1.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0.]]))
inc_out,
np.array(
[
[1.0, 1.0, 1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 1.0],
[0.0, 0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 0.0],
]
),
)
assert np.allclose(
inc_both,
np.array([[-1., -1., -1., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 1., 0., -1., 0.],
[0., 0., 1., 1., 0.]]))
inc_both,
np.array(
[
[-1.0, -1.0, -1.0, 0.0, 0.0],
[1.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 1.0, 0.0, -1.0, 0.0],
[0.0, 0.0, 1.0, 1.0, 0.0],
]
),
)
def test_find_edges():
g = dgl.DGLGraph()
g.add_nodes(10)
g.add_edges(range(9), range(1, 10))
e = g.find_edges([1, 3, 2, 4])
assert F.asnumpy(e[0][0]) == 1 and F.asnumpy(e[0][1]) == 3 and F.asnumpy(e[0][2]) == 2 and F.asnumpy(e[0][3]) == 4
assert F.asnumpy(e[1][0]) == 2 and F.asnumpy(e[1][1]) == 4 and F.asnumpy(e[1][2]) == 3 and F.asnumpy(e[1][3]) == 5
assert (
F.asnumpy(e[0][0]) == 1
and F.asnumpy(e[0][1]) == 3
and F.asnumpy(e[0][2]) == 2
and F.asnumpy(e[0][3]) == 4
)
assert (
F.asnumpy(e[1][0]) == 2
and F.asnumpy(e[1][1]) == 4
and F.asnumpy(e[1][2]) == 3
and F.asnumpy(e[1][3]) == 5
)
try:
g.find_edges([10])
......@@ -302,6 +392,7 @@ def test_find_edges():
finally:
assert fail
def test_ismultigraph():
g = dgl.DGLGraph()
g.add_nodes(10)
......@@ -313,6 +404,7 @@ def test_ismultigraph():
g.add_edges([0, 2], [0, 3])
assert g.is_multigraph == True
def test_hypersparse_query():
g = dgl.DGLGraph()
g = g.to(F.ctx())
......@@ -323,14 +415,15 @@ def test_hypersparse_query():
assert not g.has_nodes(1000002)
assert g.edge_ids(0, 1) == 0
src, dst = g.find_edges([0])
src, dst, eid = g.in_edges(1, form='all')
src, dst, eid = g.out_edges(0, form='all')
src, dst, eid = g.in_edges(1, form="all")
src, dst, eid = g.out_edges(0, form="all")
src, dst = g.edges()
assert g.in_degrees(0) == 0
assert g.in_degrees(1) == 1
assert g.out_degrees(0) == 1
assert g.out_degrees(1) == 0
def test_empty_data_initialized():
g = dgl.DGLGraph()
g = g.to(F.ctx())
......@@ -339,30 +432,31 @@ def test_empty_data_initialized():
assert "ha" in g.ndata
assert len(g.ndata["ha"]) == 1
def test_is_sorted():
u_src, u_dst = edge_pair_input(False)
s_src, s_dst = edge_pair_input(True)
u_src, u_dst = edge_pair_input(False)
s_src, s_dst = edge_pair_input(True)
u_src = F.tensor(u_src, dtype=F.int32)
u_dst = F.tensor(u_dst, dtype=F.int32)
s_src = F.tensor(s_src, dtype=F.int32)
s_dst = F.tensor(s_dst, dtype=F.int32)
u_src = F.tensor(u_src, dtype=F.int32)
u_dst = F.tensor(u_dst, dtype=F.int32)
s_src = F.tensor(s_src, dtype=F.int32)
s_dst = F.tensor(s_dst, dtype=F.int32)
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
assert src_sorted == False
assert dst_sorted == False
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
assert src_sorted == False
assert dst_sorted == False
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
assert src_sorted == True
assert dst_sorted == True
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
assert src_sorted == True
assert dst_sorted == True
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
assert src_sorted == False
assert dst_sorted == False
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
assert src_sorted == False
assert dst_sorted == False
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
assert src_sorted == True
assert dst_sorted == False
src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
assert src_sorted == True
assert dst_sorted == False
def test_default_types():
......@@ -379,10 +473,10 @@ def test_formats():
try:
g.in_degrees()
g.out_degrees()
g.formats('coo').in_degrees()
g.formats('coo').out_degrees()
g.formats('csc').in_degrees()
g.formats('csr').out_degrees()
g.formats("coo").in_degrees()
g.formats("coo").out_degrees()
g.formats("csc").in_degrees()
g.formats("csr").out_degrees()
fail = False
except DGLError:
fail = True
......@@ -390,7 +484,7 @@ def test_formats():
assert not fail
# in_degrees NOT works if csc available only
try:
g.formats('csc').out_degrees()
g.formats("csc").out_degrees()
fail = True
except DGLError:
fail = False
......@@ -398,14 +492,15 @@ def test_formats():
assert not fail
# out_degrees NOT works if csr available only
try:
g.formats('csr').in_degrees()
g.formats("csr").in_degrees()
fail = True
except DGLError:
fail = False
finally:
assert not fail
if __name__ == '__main__':
if __name__ == "__main__":
test_query()
test_mutation()
test_scipy_adjmat()
......
import io
import pickle
import unittest
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import pytest
import scipy.sparse as ssp
import dgl
import test_utils
from dgl.graph_index import create_graph_index
from dgl.utils import toindex
import backend as F
import dgl.function as fn
import pickle
import io
import unittest, pytest
import test_utils
from test_utils import parametrize_idtype, get_cases
from test_utils import get_cases, parametrize_idtype
from utils import assert_is_identical, assert_is_identical_hetero
def _assert_is_identical_nodeflow(nf1, nf2):
assert nf1.number_of_nodes() == nf2.number_of_nodes()
src, dst = nf1.all_edges()
......@@ -32,23 +36,29 @@ def _assert_is_identical_nodeflow(nf1, nf2):
for k in nf1.blocks[i].data:
assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k])
def _assert_is_identical_batchedgraph(bg1, bg2):
assert_is_identical(bg1, bg2)
assert bg1.batch_size == bg2.batch_size
assert bg1.batch_num_nodes == bg2.batch_num_nodes
assert bg1.batch_num_edges == bg2.batch_num_edges
def _assert_is_identical_batchedhetero(bg1, bg2):
assert_is_identical_hetero(bg1, bg2)
for ntype in bg1.ntypes:
assert bg1.batch_num_nodes(ntype) == bg2.batch_num_nodes(ntype)
for canonical_etype in bg1.canonical_etypes:
assert bg1.batch_num_edges(canonical_etype) == bg2.batch_num_edges(canonical_etype)
assert bg1.batch_num_edges(canonical_etype) == bg2.batch_num_edges(
canonical_etype
)
def _assert_is_identical_index(i1, i2):
assert i1.slice_data() == i2.slice_data()
assert F.array_equal(i1.tousertensor(), i2.tousertensor())
def _reconstruct_pickle(obj):
f = io.BytesIO()
pickle.dump(obj, f)
......@@ -58,11 +68,12 @@ def _reconstruct_pickle(obj):
return obj
def test_pickling_index():
# normal index
i = toindex([1, 2, 3])
i.tousertensor()
i.todgltensor() # construct a dgl tensor which is unpicklable
i.todgltensor() # construct a dgl tensor which is unpicklable
i2 = _reconstruct_pickle(i)
_assert_is_identical_index(i, i2)
......@@ -71,6 +82,7 @@ def test_pickling_index():
i2 = _reconstruct_pickle(i)
_assert_is_identical_index(i, i2)
def test_pickling_graph_index():
gi = create_graph_index(None, False)
gi.add_nodes(3)
......@@ -87,53 +99,65 @@ def test_pickling_graph_index():
def _global_message_func(nodes):
return {'x': nodes.data['x']}
return {"x": nodes.data["x"]}
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@parametrize_idtype
@pytest.mark.parametrize('g', get_cases(exclude=['dglgraph', 'two_hetero_batch']))
@pytest.mark.parametrize(
"g", get_cases(exclude=["dglgraph", "two_hetero_batch"])
)
def test_pickling_graph(g, idtype):
g = g.astype(idtype)
new_g = _reconstruct_pickle(g)
test_utils.check_graph_equal(g, new_g, check_feature=True)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
def test_pickling_batched_heterograph():
# copied from test_heterograph.create_test_heterograph()
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
})
g2 = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
})
g.nodes['user'].data['u_h'] = F.randn((3, 4))
g.nodes['game'].data['g_h'] = F.randn((2, 5))
g.edges['plays'].data['p_h'] = F.randn((4, 6))
g2.nodes['user'].data['u_h'] = F.randn((3, 4))
g2.nodes['game'].data['g_h'] = F.randn((2, 5))
g2.edges['plays'].data['p_h'] = F.randn((4, 6))
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
}
)
g2 = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
}
)
g.nodes["user"].data["u_h"] = F.randn((3, 4))
g.nodes["game"].data["g_h"] = F.randn((2, 5))
g.edges["plays"].data["p_h"] = F.randn((4, 6))
g2.nodes["user"].data["u_h"] = F.randn((3, 4))
g2.nodes["game"].data["g_h"] = F.randn((2, 5))
g2.edges["plays"].data["p_h"] = F.randn((4, 6))
bg = dgl.batch([g, g2])
new_bg = _reconstruct_pickle(bg)
test_utils.check_graph_equal(bg, new_bg)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU edge_subgraph w/ relabeling not implemented")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="GPU edge_subgraph w/ relabeling not implemented",
)
def test_pickling_subgraph():
f1 = io.BytesIO()
f2 = io.BytesIO()
g = dgl.rand_graph(10000, 100000)
g.ndata['x'] = F.randn((10000, 4))
g.edata['x'] = F.randn((100000, 5))
g.ndata["x"] = F.randn((10000, 4))
g.edata["x"] = F.randn((100000, 5))
pickle.dump(g, f1)
sg = g.subgraph([0, 1])
sgx = sg.ndata['x'] # materialize
sgx = sg.ndata["x"] # materialize
pickle.dump(sg, f2)
# TODO(BarclayII): How should I test that the size of the subgraph pickle file should not
# be as large as the size of the original pickle file?
......@@ -141,38 +165,47 @@ def test_pickling_subgraph():
f2.seek(0)
f2.truncate()
sgx = sg.edata['x'] # materialize
sgx = sg.edata["x"] # materialize
pickle.dump(sg, f2)
assert f1.tell() > f2.tell() * 50
f2.seek(0)
f2.truncate()
sg = g.edge_subgraph([0])
sgx = sg.edata['x'] # materialize
sgx = sg.edata["x"] # materialize
pickle.dump(sg, f2)
assert f1.tell() > f2.tell() * 50
f2.seek(0)
f2.truncate()
sgx = sg.ndata['x'] # materialize
sgx = sg.ndata["x"] # materialize
pickle.dump(sg, f2)
assert f1.tell() > f2.tell() * 50
f1.close()
f2.close()
@unittest.skipIf(F._default_context_str != 'gpu', reason="Need GPU for pin")
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TensorFlow create graph on gpu when unpickle")
@unittest.skipIf(F._default_context_str != "gpu", reason="Need GPU for pin")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TensorFlow create graph on gpu when unpickle",
)
@parametrize_idtype
def test_pickling_is_pinned(idtype):
from copy import deepcopy
g = dgl.rand_graph(10, 20, idtype=idtype, device=F.cpu())
hg = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
}, idtype=idtype, device=F.cpu())
hg = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
},
idtype=idtype,
device=F.cpu(),
)
for graph in [g, hg]:
assert not graph.is_pinned()
graph.pin_memory_()
......@@ -186,7 +219,7 @@ def test_pickling_is_pinned(idtype):
graph.unpin_memory_()
if __name__ == '__main__':
if __name__ == "__main__":
test_pickling_index()
test_pickling_graph_index()
test_pickling_frame()
......
import backend as F
import numpy as np
from test_utils import parametrize_idtype
import dgl
import numpy as np
from test_utils import parametrize_idtype
@parametrize_idtype
......
import io
import multiprocessing as mp
import os
import pickle
import unittest
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import scipy.sparse as ssp
import dgl
from dgl.graph_index import create_graph_index
from dgl.utils import toindex
import backend as F
import dgl.function as fn
import pickle
import io
import unittest
from test_utils import parametrize_idtype
import multiprocessing as mp
import os
def create_test_graph(idtype):
g = dgl.heterograph(({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
}), idtype=idtype)
g = dgl.heterograph(
(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
}
),
idtype=idtype,
)
return g
def _assert_is_identical_hetero(g, g2):
assert g.ntypes == g2.ntypes
assert g.canonical_etypes == g2.canonical_etypes
......@@ -35,29 +44,38 @@ def _assert_is_identical_hetero(g, g2):
# check if edge ID spaces and feature spaces are equal
for etype in g.canonical_etypes:
src, dst = g.all_edges(etype=etype, order='eid')
src2, dst2 = g2.all_edges(etype=etype, order='eid')
src, dst = g.all_edges(etype=etype, order="eid")
src2, dst2 = g2.all_edges(etype=etype, order="eid")
assert F.array_equal(src, src2)
assert F.array_equal(dst, dst2)
@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="Not support tensorflow for now",
)
@parametrize_idtype
def test_single_process(idtype):
hg = create_test_graph(idtype=idtype)
hg_share = hg.shared_memory("hg")
hg_rebuild = dgl.hetero_from_shared_memory('hg')
hg_rebuild = dgl.hetero_from_shared_memory("hg")
hg_save_again = hg_rebuild.shared_memory("hg")
_assert_is_identical_hetero(hg, hg_share)
_assert_is_identical_hetero(hg, hg_rebuild)
_assert_is_identical_hetero(hg, hg_save_again)
def sub_proc(hg_origin, name):
hg_rebuild = dgl.hetero_from_shared_memory(name)
hg_save_again = hg_rebuild.shared_memory(name)
_assert_is_identical_hetero(hg_origin, hg_rebuild)
_assert_is_identical_hetero(hg_origin, hg_save_again)
@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="Not support tensorflow for now",
)
@parametrize_idtype
def test_multi_process(idtype):
hg = create_test_graph(idtype=idtype)
......@@ -66,8 +84,14 @@ def test_multi_process(idtype):
p.start()
p.join()
@unittest.skipIf(F._default_context_str == 'cpu', reason="Need gpu for this test")
@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
@unittest.skipIf(
F._default_context_str == "cpu", reason="Need gpu for this test"
)
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="Not support tensorflow for now",
)
def test_copy_from_gpu():
hg = create_test_graph(idtype=F.int32)
hg_gpu = hg.to(F.cuda())
......@@ -76,6 +100,7 @@ def test_copy_from_gpu():
p.start()
p.join()
# TODO: Test calling shared_memory with Blocks (a subclass of HeteroGraph)
if __name__ == "__main__":
test_single_process(F.int64)
......
import numpy as np
import scipy.sparse as sp
import backend as F
import dgl
import dgl.function as fn
import backend as F
import numpy as np
import scipy.sparse as sp
from test_utils import parametrize_idtype
D = 5
def generate_graph(idtype):
g = dgl.DGLGraph()
g = g.astype(idtype).to(F.ctx())
......@@ -17,117 +19,142 @@ def generate_graph(idtype):
g.add_edges(i, 9)
# add a back flow from 9 to 0
g.add_edges(9, 0)
g.ndata.update({'f1' : F.randn((10,)), 'f2' : F.randn((10, D))})
g.ndata.update({"f1": F.randn((10,)), "f2": F.randn((10, D))})
weights = F.randn((17,))
g.edata.update({'e1': weights, 'e2': F.unsqueeze(weights, 1)})
g.edata.update({"e1": weights, "e2": F.unsqueeze(weights, 1)})
return g
@parametrize_idtype
def test_v2v_update_all(idtype):
def _test(fld):
def message_func(edges):
return {'m' : edges.src[fld]}
return {"m": edges.src[fld]}
def message_func_edge(edges):
if len(edges.src[fld].shape) == 1:
return {'m' : edges.src[fld] * edges.data['e1']}
return {"m": edges.src[fld] * edges.data["e1"]}
else:
return {'m' : edges.src[fld] * edges.data['e2']}
return {"m": edges.src[fld] * edges.data["e2"]}
def reduce_func(nodes):
return {fld : F.sum(nodes.mailbox['m'], 1)}
return {fld: F.sum(nodes.mailbox["m"], 1)}
def apply_func(nodes):
return {fld : 2 * nodes.data[fld]}
return {fld: 2 * nodes.data[fld]}
g = generate_graph(idtype)
# update all
v1 = g.ndata[fld]
g.update_all(fn.copy_u(u=fld, out='m'), fn.sum(msg='m', out=fld), apply_func)
g.update_all(
fn.copy_u(u=fld, out="m"), fn.sum(msg="m", out=fld), apply_func
)
v2 = g.ndata[fld]
g.ndata.update({fld : v1})
g.ndata.update({fld: v1})
g.update_all(message_func, reduce_func, apply_func)
v3 = g.ndata[fld]
assert F.allclose(v2, v3)
# update all with edge weights
v1 = g.ndata[fld]
g.update_all(fn.u_mul_e(fld, 'e1', 'm'),
fn.sum(msg='m', out=fld), apply_func)
g.update_all(
fn.u_mul_e(fld, "e1", "m"), fn.sum(msg="m", out=fld), apply_func
)
v2 = g.ndata[fld]
g.ndata.update({fld : v1})
g.ndata.update({fld: v1})
g.update_all(message_func_edge, reduce_func, apply_func)
v4 = g.ndata[fld]
assert F.allclose(v2, v4)
# test 1d node features
_test('f1')
_test("f1")
# test 2d node features
_test('f2')
_test("f2")
@parametrize_idtype
def test_v2v_snr(idtype):
u = F.tensor([0, 0, 0, 3, 4, 9], idtype)
v = F.tensor([1, 2, 3, 9, 9, 0], idtype)
def _test(fld):
def message_func(edges):
return {'m' : edges.src[fld]}
return {"m": edges.src[fld]}
def message_func_edge(edges):
if len(edges.src[fld].shape) == 1:
return {'m' : edges.src[fld] * edges.data['e1']}
return {"m": edges.src[fld] * edges.data["e1"]}
else:
return {'m' : edges.src[fld] * edges.data['e2']}
return {"m": edges.src[fld] * edges.data["e2"]}
def reduce_func(nodes):
return {fld : F.sum(nodes.mailbox['m'], 1)}
return {fld: F.sum(nodes.mailbox["m"], 1)}
def apply_func(nodes):
return {fld : 2 * nodes.data[fld]}
return {fld: 2 * nodes.data[fld]}
g = generate_graph(idtype)
# send and recv
v1 = g.ndata[fld]
g.send_and_recv((u, v), fn.copy_u(u=fld, out='m'),
fn.sum(msg='m', out=fld), apply_func)
g.send_and_recv(
(u, v),
fn.copy_u(u=fld, out="m"),
fn.sum(msg="m", out=fld),
apply_func,
)
v2 = g.ndata[fld]
g.ndata.update({fld : v1})
g.ndata.update({fld: v1})
g.send_and_recv((u, v), message_func, reduce_func, apply_func)
v3 = g.ndata[fld]
assert F.allclose(v2, v3)
# send and recv with edge weights
v1 = g.ndata[fld]
g.send_and_recv((u, v), fn.u_mul_e(fld, 'e1', 'm'),
fn.sum(msg='m', out=fld), apply_func)
g.send_and_recv(
(u, v),
fn.u_mul_e(fld, "e1", "m"),
fn.sum(msg="m", out=fld),
apply_func,
)
v2 = g.ndata[fld]
g.ndata.update({fld : v1})
g.ndata.update({fld: v1})
g.send_and_recv((u, v), message_func_edge, reduce_func, apply_func)
v4 = g.ndata[fld]
assert F.allclose(v2, v4)
# test 1d node features
_test('f1')
_test("f1")
# test 2d node features
_test('f2')
_test("f2")
@parametrize_idtype
def test_v2v_pull(idtype):
nodes = F.tensor([1, 2, 3, 9], idtype)
def _test(fld):
def message_func(edges):
return {'m' : edges.src[fld]}
return {"m": edges.src[fld]}
def message_func_edge(edges):
if len(edges.src[fld].shape) == 1:
return {'m' : edges.src[fld] * edges.data['e1']}
return {"m": edges.src[fld] * edges.data["e1"]}
else:
return {'m' : edges.src[fld] * edges.data['e2']}
return {"m": edges.src[fld] * edges.data["e2"]}
def reduce_func(nodes):
return {fld : F.sum(nodes.mailbox['m'], 1)}
return {fld: F.sum(nodes.mailbox["m"], 1)}
def apply_func(nodes):
return {fld : 2 * nodes.data[fld]}
return {fld: 2 * nodes.data[fld]}
g = generate_graph(idtype)
# send and recv
v1 = g.ndata[fld]
g.pull(nodes, fn.copy_u(u=fld, out='m'), fn.sum(msg='m', out=fld), apply_func)
g.pull(
nodes,
fn.copy_u(u=fld, out="m"),
fn.sum(msg="m", out=fld),
apply_func,
)
v2 = g.ndata[fld]
g.ndata[fld] = v1
g.pull(nodes, message_func, reduce_func, apply_func)
......@@ -135,17 +162,23 @@ def test_v2v_pull(idtype):
assert F.allclose(v2, v3)
# send and recv with edge weights
v1 = g.ndata[fld]
g.pull(nodes, fn.u_mul_e(fld, 'e1', 'm'),
fn.sum(msg='m', out=fld), apply_func)
g.pull(
nodes,
fn.u_mul_e(fld, "e1", "m"),
fn.sum(msg="m", out=fld),
apply_func,
)
v2 = g.ndata[fld]
g.ndata[fld] = v1
g.pull(nodes, message_func_edge, reduce_func, apply_func)
v4 = g.ndata[fld]
assert F.allclose(v2, v4)
# test 1d node features
_test('f1')
_test("f1")
# test 2d node features
_test('f2')
_test("f2")
@parametrize_idtype
def test_update_all_multi_fallback(idtype):
......@@ -156,42 +189,50 @@ def test_update_all_multi_fallback(idtype):
for i in range(1, 9):
g.add_edges(0, i)
g.add_edges(i, 9)
g.ndata['h'] = F.randn((10, D))
g.edata['w1'] = F.randn((16,))
g.edata['w2'] = F.randn((16, D))
g.ndata["h"] = F.randn((10, D))
g.edata["w1"] = F.randn((16,))
g.edata["w2"] = F.randn((16, D))
def _mfunc_hxw1(edges):
return {'m1' : edges.src['h'] * F.unsqueeze(edges.data['w1'], 1)}
return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)}
def _mfunc_hxw2(edges):
return {'m2' : edges.src['h'] * edges.data['w2']}
return {"m2": edges.src["h"] * edges.data["w2"]}
def _rfunc_m1(nodes):
return {'o1' : F.sum(nodes.mailbox['m1'], 1)}
return {"o1": F.sum(nodes.mailbox["m1"], 1)}
def _rfunc_m2(nodes):
return {'o2' : F.sum(nodes.mailbox['m2'], 1)}
return {"o2": F.sum(nodes.mailbox["m2"], 1)}
def _rfunc_m1max(nodes):
return {'o3' : F.max(nodes.mailbox['m1'], 1)}
return {"o3": F.max(nodes.mailbox["m1"], 1)}
def _afunc(nodes):
ret = {}
for k, v in nodes.data.items():
if k.startswith('o'):
if k.startswith("o"):
ret[k] = 2 * v
return ret
# compute ground truth
g.update_all(_mfunc_hxw1, _rfunc_m1, _afunc)
o1 = g.ndata.pop('o1')
o1 = g.ndata.pop("o1")
g.update_all(_mfunc_hxw2, _rfunc_m2, _afunc)
o2 = g.ndata.pop('o2')
o2 = g.ndata.pop("o2")
g.update_all(_mfunc_hxw1, _rfunc_m1max, _afunc)
o3 = g.ndata.pop('o3')
o3 = g.ndata.pop("o3")
# v2v spmv
g.update_all(fn.u_mul_e('h', 'w1', 'm1'),
fn.sum(msg='m1', out='o1'),
_afunc)
assert F.allclose(o1, g.ndata.pop('o1'))
g.update_all(
fn.u_mul_e("h", "w1", "m1"), fn.sum(msg="m1", out="o1"), _afunc
)
assert F.allclose(o1, g.ndata.pop("o1"))
# v2v fallback to e2v
g.update_all(fn.u_mul_e('h', 'w2', 'm2'),
fn.sum(msg='m2', out='o2'),
_afunc)
assert F.allclose(o2, g.ndata.pop('o2'))
g.update_all(
fn.u_mul_e("h", "w2", "m2"), fn.sum(msg="m2", out="o2"), _afunc
)
assert F.allclose(o2, g.ndata.pop("o2"))
@parametrize_idtype
def test_pull_multi_fallback(idtype):
......@@ -202,44 +243,58 @@ def test_pull_multi_fallback(idtype):
for i in range(1, 9):
g.add_edges(0, i)
g.add_edges(i, 9)
g.ndata['h'] = F.randn((10, D))
g.edata['w1'] = F.randn((16,))
g.edata['w2'] = F.randn((16, D))
g.ndata["h"] = F.randn((10, D))
g.edata["w1"] = F.randn((16,))
g.edata["w2"] = F.randn((16, D))
def _mfunc_hxw1(edges):
return {'m1' : edges.src['h'] * F.unsqueeze(edges.data['w1'], 1)}
return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)}
def _mfunc_hxw2(edges):
return {'m2' : edges.src['h'] * edges.data['w2']}
return {"m2": edges.src["h"] * edges.data["w2"]}
def _rfunc_m1(nodes):
return {'o1' : F.sum(nodes.mailbox['m1'], 1)}
return {"o1": F.sum(nodes.mailbox["m1"], 1)}
def _rfunc_m2(nodes):
return {'o2' : F.sum(nodes.mailbox['m2'], 1)}
return {"o2": F.sum(nodes.mailbox["m2"], 1)}
def _rfunc_m1max(nodes):
return {'o3' : F.max(nodes.mailbox['m1'], 1)}
return {"o3": F.max(nodes.mailbox["m1"], 1)}
def _afunc(nodes):
ret = {}
for k, v in nodes.data.items():
if k.startswith('o'):
if k.startswith("o"):
ret[k] = 2 * v
return ret
# nodes to pull
def _pull_nodes(nodes):
# compute ground truth
g.pull(nodes, _mfunc_hxw1, _rfunc_m1, _afunc)
o1 = g.ndata.pop('o1')
o1 = g.ndata.pop("o1")
g.pull(nodes, _mfunc_hxw2, _rfunc_m2, _afunc)
o2 = g.ndata.pop('o2')
o2 = g.ndata.pop("o2")
g.pull(nodes, _mfunc_hxw1, _rfunc_m1max, _afunc)
o3 = g.ndata.pop('o3')
o3 = g.ndata.pop("o3")
# v2v spmv
g.pull(nodes, fn.u_mul_e('h', 'w1', 'm1'),
fn.sum(msg='m1', out='o1'),
_afunc)
assert F.allclose(o1, g.ndata.pop('o1'))
g.pull(
nodes,
fn.u_mul_e("h", "w1", "m1"),
fn.sum(msg="m1", out="o1"),
_afunc,
)
assert F.allclose(o1, g.ndata.pop("o1"))
# v2v fallback to e2v
g.pull(nodes, fn.u_mul_e('h', 'w2', 'm2'),
fn.sum(msg='m2', out='o2'),
_afunc)
assert F.allclose(o2, g.ndata.pop('o2'))
g.pull(
nodes,
fn.u_mul_e("h", "w2", "m2"),
fn.sum(msg="m2", out="o2"),
_afunc,
)
assert F.allclose(o2, g.ndata.pop("o2"))
# test#1: non-0deg nodes
nodes = [1, 2, 9]
_pull_nodes(nodes)
......@@ -247,13 +302,17 @@ def test_pull_multi_fallback(idtype):
nodes = [0, 1, 2, 9]
_pull_nodes(nodes)
@parametrize_idtype
def test_spmv_3d_feat(idtype):
def src_mul_edge_udf(edges):
return {'sum': edges.src['h'] * F.unsqueeze(F.unsqueeze(edges.data['h'], 1), 1)}
return {
"sum": edges.src["h"]
* F.unsqueeze(F.unsqueeze(edges.data["h"], 1), 1)
}
def sum_udf(nodes):
return {'h': F.sum(nodes.mailbox['sum'], 1)}
return {"h": F.sum(nodes.mailbox["sum"], 1)}
n = 100
p = 0.1
......@@ -266,44 +325,53 @@ def test_spmv_3d_feat(idtype):
h = F.randn((n, 5, 5))
e = F.randn((m,))
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=fn.u_mul_e('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1
ans = g.ndata['h']
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(
message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h")
) # 1
ans = g.ndata["h"]
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2
assert F.allclose(g.ndata['h'], ans)
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(
message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h")
) # 2
assert F.allclose(g.ndata["h"], ans)
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
assert F.allclose(g.ndata['h'], ans)
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
assert F.allclose(g.ndata["h"], ans)
# test#2: e2v
def src_mul_edge_udf(edges):
return {'sum': edges.src['h'] * edges.data['h']}
return {"sum": edges.src["h"] * edges.data["h"]}
h = F.randn((n, 5, 5))
e = F.randn((m, 5, 5))
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=fn.u_mul_e('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1
ans = g.ndata['h']
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(
message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h")
) # 1
ans = g.ndata["h"]
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(
message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h")
) # 2
assert F.allclose(g.ndata["h"], ans)
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2
assert F.allclose(g.ndata['h'], ans)
g.ndata["h"] = h
g.edata["h"] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
assert F.allclose(g.ndata["h"], ans)
g.ndata['h'] = h
g.edata['h'] = e
g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
assert F.allclose(g.ndata['h'], ans)
if __name__ == '__main__':
if __name__ == "__main__":
test_v2v_update_all()
test_v2v_snr()
test_v2v_pull()
......
......@@ -4,18 +4,18 @@ from collections import Counter
from itertools import product
import backend as F
import dgl
import dgl.function as fn
import networkx as nx
import numpy as np
import pytest
import scipy.sparse as ssp
import test_utils
from dgl import DGLError
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
feat_size = 2
......@@ -48,7 +48,6 @@ def create_test_heterograph(idtype):
def create_test_heterograph_2(idtype):
src = np.random.randint(0, 50, 25)
dst = np.random.randint(0, 50, 25)
src1 = np.random.randint(0, 25, 10)
......@@ -72,7 +71,6 @@ def create_test_heterograph_2(idtype):
def create_test_heterograph_large(idtype):
src = np.random.randint(0, 50, 2500)
dst = np.random.randint(0, 50, 2500)
g = dgl.heterograph(
......@@ -163,7 +161,6 @@ def test_unary_copy_u(idtype):
@parametrize_idtype
def test_unary_copy_e(idtype):
def _test(mfunc, rfunc):
g = create_test_heterograph_large(idtype)
g0 = create_test_heterograph_2(idtype)
g1 = create_test_heterograph(idtype)
......@@ -230,6 +227,7 @@ def test_unary_copy_e(idtype):
e_grad6 = F.grad(g["plays"].edata["eid"])
e_grad7 = F.grad(g["wishes"].edata["eid"])
e_grad8 = F.grad(g["follows"].edata["eid"])
# # correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(
......@@ -254,7 +252,6 @@ def test_unary_copy_e(idtype):
@parametrize_idtype
def test_binary_op(idtype):
def _test(lhs, rhs, binary_op, reducer):
g = create_test_heterograph(idtype)
x1 = F.randn((g.num_nodes("user"), feat_size))
......@@ -309,6 +306,7 @@ def test_binary_op(idtype):
r2 = g.nodes["game"].data["y"]
F.backward(r2, F.ones(r2.shape))
n_grad2 = F.grad(r2)
# correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment