Unverified Commit 68ec6247 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[API][Doc] API change & basic tutorials (#113)

* Add SH tutorials

* setup sphinx-gallery; work on graph tutorial

* draft dglgraph tutorial

* update readme to include document url

* rm obsolete file

* Draft the message passing tutorial

* Capsule code (#102)

* add capsule example

* clean code

* better naming

* better naming

* [GCN]tutorial scaffold

* fix capsule example code

* remove previous capsule example code

* graph struc edit

* modified:   2_graph.py

* update doc of capsule

* update capsule docs

* update capsule docs

* add msg passing prime

* GCN-GAT tutorial Section 1 and 2

* comment for API improvement

* section 3

* Tutorial API change (#115)

* change the API as discusses; toy example

* enable the new set/get syntax

* fixed pytorch utest

* fixed gcn example

* fixed gat example

* fixed mx utests

* fix mx utest

* delete apply edges; add utest for update_edges

* small change on toy example

* fix utest

* fix out in degrees bug

* update pagerank example and add it to CI

* add delitem for dataview

* make edges() return form that is compatible with send/update_edges etc

* fix index bug when the given data is one-int-tensor

* fix doc
parent 2ecd2b23
...@@ -8,24 +8,24 @@ D = 5 ...@@ -8,24 +8,24 @@ D = 5
def test_line_graph(): def test_line_graph():
N = 5 N = 5
G = dgl.DGLGraph(nx.star_graph(N)) G = dgl.DGLGraph(nx.star_graph(N))
G.set_e_repr({'h' : th.randn((2 * N, D))}) G.edata['h'] = th.randn((2 * N, D))
n_edges = G.number_of_edges() n_edges = G.number_of_edges()
L = G.line_graph(shared=True) L = G.line_graph(shared=True)
assert L.number_of_nodes() == 2 * N assert L.number_of_nodes() == 2 * N
L.set_n_repr({'h' : th.randn((2 * N, D))}) L.ndata['h'] = th.randn((2 * N, D))
# update node features on line graph should reflect to edge features on # update node features on line graph should reflect to edge features on
# original graph. # original graph.
u = [0, 0, 2, 3] u = [0, 0, 2, 3]
v = [1, 2, 0, 0] v = [1, 2, 0, 0]
eid = G.edge_ids(u, v) eid = G.edge_ids(u, v)
L.set_n_repr({'h' : th.zeros((4, D))}, eid) L.nodes[eid].data['h'] = th.zeros((4, D))
assert th.allclose(G.get_e_repr(u, v)['h'], th.zeros((4, D))) assert th.allclose(G.edges[u, v].data['h'], th.zeros((4, D)))
# adding a new node feature on line graph should also reflect to a new # adding a new node feature on line graph should also reflect to a new
# edge feature on original graph # edge feature on original graph
data = th.randn(n_edges, D) data = th.randn(n_edges, D)
L.set_n_repr({'w': data}) L.ndata['w'] = data
assert th.allclose(G.get_e_repr()['w'], data) assert th.allclose(G.edata['w'], data)
def test_no_backtracking(): def test_no_backtracking():
N = 5 N = 5
......
...@@ -21,41 +21,41 @@ def generate_graph(): ...@@ -21,41 +21,41 @@ def generate_graph():
def test_update_all(): def test_update_all():
def _test(fld): def _test(fld):
def message_func(hu, edge): def message_func(edges):
return {'m' : hu[fld]} return {'m' : edges.src[fld]}
def message_func_edge(hu, edge): def message_func_edge(edges):
if len(hu[fld].shape) == 1: if len(edges.src[fld].shape) == 1:
return {'m' : hu[fld] * edge['e1']} return {'m' : edges.src[fld] * edges.data['e1']}
else: else:
return {'m' : hu[fld] * edge['e2']} return {'m' : edges.src[fld] * edges.data['e2']}
def reduce_func(hv, msgs): def reduce_func(nodes):
return {fld : th.sum(msgs['m'], 1)} return {fld : th.sum(nodes.mailbox['m'], 1)}
def apply_func(hu): def apply_func(nodes):
return {fld : 2 * hu[fld]} return {fld : 2 * nodes.data[fld]}
g = generate_graph() g = generate_graph()
# update all # update all
v1 = g.get_n_repr()[fld] v1 = g.ndata[fld]
g.update_all(fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out=fld), apply_func) g.update_all(fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out=fld), apply_func)
v2 = g.get_n_repr()[fld] v2 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.update_all(message_func, reduce_func, apply_func) g.update_all(message_func, reduce_func, apply_func)
v3 = g.get_n_repr()[fld] v3 = g.ndata[fld]
assert th.allclose(v2, v3) assert th.allclose(v2, v3)
# update all with edge weights # update all with edge weights
v1 = g.get_n_repr()[fld] v1 = g.ndata[fld]
g.update_all(fn.src_mul_edge(src=fld, edge='e1', out='m'), g.update_all(fn.src_mul_edge(src=fld, edge='e1', out='m'),
fn.sum(msg='m', out=fld), apply_func) fn.sum(msg='m', out=fld), apply_func)
v2 = g.get_n_repr()[fld] v2 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.update_all(fn.src_mul_edge(src=fld, edge='e2', out='m'), g.update_all(fn.src_mul_edge(src=fld, edge='e2', out='m'),
fn.sum(msg='m', out=fld), apply_func) fn.sum(msg='m', out=fld), apply_func)
v3 = g.get_n_repr()[fld] v3 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.update_all(message_func_edge, reduce_func, apply_func) g.update_all(message_func_edge, reduce_func, apply_func)
v4 = g.get_n_repr()[fld] v4 = g.ndata[fld]
assert th.allclose(v2, v3) assert th.allclose(v2, v3)
assert th.allclose(v3, v4) assert th.allclose(v3, v4)
# test 1d node features # test 1d node features
...@@ -67,42 +67,42 @@ def test_send_and_recv(): ...@@ -67,42 +67,42 @@ def test_send_and_recv():
u = th.tensor([0, 0, 0, 3, 4, 9]) u = th.tensor([0, 0, 0, 3, 4, 9])
v = th.tensor([1, 2, 3, 9, 9, 0]) v = th.tensor([1, 2, 3, 9, 9, 0])
def _test(fld): def _test(fld):
def message_func(hu, edge): def message_func(edges):
return {'m' : hu[fld]} return {'m' : edges.src[fld]}
def message_func_edge(hu, edge): def message_func_edge(edges):
if len(hu[fld].shape) == 1: if len(edges.src[fld].shape) == 1:
return {'m' : hu[fld] * edge['e1']} return {'m' : edges.src[fld] * edges.data['e1']}
else: else:
return {'m' : hu[fld] * edge['e2']} return {'m' : edges.src[fld] * edges.data['e2']}
def reduce_func(hv, msgs): def reduce_func(nodes):
return {fld : th.sum(msgs['m'], 1)} return {fld : th.sum(nodes.mailbox['m'], 1)}
def apply_func(hu): def apply_func(nodes):
return {fld : 2 * hu[fld]} return {fld : 2 * nodes.data[fld]}
g = generate_graph() g = generate_graph()
# send and recv # send and recv
v1 = g.get_n_repr()[fld] v1 = g.ndata[fld]
g.send_and_recv(u, v, fn.copy_src(src=fld, out='m'), g.send_and_recv((u, v), fn.copy_src(src=fld, out='m'),
fn.sum(msg='m', out=fld), apply_func) fn.sum(msg='m', out=fld), apply_func)
v2 = g.get_n_repr()[fld] v2 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.send_and_recv(u, v, message_func, reduce_func, apply_func) g.send_and_recv((u, v), message_func, reduce_func, apply_func)
v3 = g.get_n_repr()[fld] v3 = g.ndata[fld]
assert th.allclose(v2, v3) assert th.allclose(v2, v3)
# send and recv with edge weights # send and recv with edge weights
v1 = g.get_n_repr()[fld] v1 = g.ndata[fld]
g.send_and_recv(u, v, fn.src_mul_edge(src=fld, edge='e1', out='m'), g.send_and_recv((u, v), fn.src_mul_edge(src=fld, edge='e1', out='m'),
fn.sum(msg='m', out=fld), apply_func) fn.sum(msg='m', out=fld), apply_func)
v2 = g.get_n_repr()[fld] v2 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.send_and_recv(u, v, fn.src_mul_edge(src=fld, edge='e2', out='m'), g.send_and_recv((u, v), fn.src_mul_edge(src=fld, edge='e2', out='m'),
fn.sum(msg='m', out=fld), apply_func) fn.sum(msg='m', out=fld), apply_func)
v3 = g.get_n_repr()[fld] v3 = g.ndata[fld]
g.set_n_repr({fld : v1}) g.set_n_repr({fld : v1})
g.send_and_recv(u, v, message_func_edge, reduce_func, apply_func) g.send_and_recv((u, v), message_func_edge, reduce_func, apply_func)
v4 = g.get_n_repr()[fld] v4 = g.ndata[fld]
assert th.allclose(v2, v3) assert th.allclose(v2, v3)
assert th.allclose(v3, v4) assert th.allclose(v3, v4)
# test 1d node features # test 1d node features
...@@ -111,14 +111,14 @@ def test_send_and_recv(): ...@@ -111,14 +111,14 @@ def test_send_and_recv():
_test('f2') _test('f2')
def test_update_all_multi_fn(): def test_update_all_multi_fn():
def message_func(hu, edge): def message_func(edges):
return {'m2': hu['f2']} return {'m2': edges.src['f2']}
def message_func_edge(hu, edge): def message_func_edge(edges):
return {'m2': hu['f2'] * edge['e2']} return {'m2': edges.src['f2'] * edges.data['e2']}
def reduce_func(hv, msgs): def reduce_func(nodes):
return {'v2': th.sum(msgs['m2'], 1)} return {'v2': th.sum(nodes.mailbox['m2'], 1)}
g = generate_graph() g = generate_graph()
g.set_n_repr({'v1' : th.zeros((10,)), 'v2' : th.zeros((10,))}) g.set_n_repr({'v1' : th.zeros((10,)), 'v2' : th.zeros((10,))})
...@@ -127,19 +127,19 @@ def test_update_all_multi_fn(): ...@@ -127,19 +127,19 @@ def test_update_all_multi_fn():
g.update_all([fn.copy_src(src=fld, out='m1'), message_func], g.update_all([fn.copy_src(src=fld, out='m1'), message_func],
[fn.sum(msg='m1', out='v1'), reduce_func], [fn.sum(msg='m1', out='v1'), reduce_func],
None) None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
# run builtin with single message and reduce # run builtin with single message and reduce
g.update_all(fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out='v1'), None) g.update_all(fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out='v1'), None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
# 1 message, 2 reduces # 1 message, 2 reduces
g.update_all(fn.copy_src(src=fld, out='m'), [fn.sum(msg='m', out='v2'), fn.sum(msg='m', out='v3')], None) g.update_all(fn.copy_src(src=fld, out='m'), [fn.sum(msg='m', out='v2'), fn.sum(msg='m', out='v3')], None)
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
v3 = g.get_n_repr()['v3'] v3 = g.ndata['v3']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
assert th.allclose(v1, v3) assert th.allclose(v1, v3)
...@@ -147,29 +147,29 @@ def test_update_all_multi_fn(): ...@@ -147,29 +147,29 @@ def test_update_all_multi_fn():
g.update_all([fn.src_mul_edge(src=fld, edge='e1', out='m1'), fn.src_mul_edge(src=fld, edge='e2', out='m2')], g.update_all([fn.src_mul_edge(src=fld, edge='e1', out='m1'), fn.src_mul_edge(src=fld, edge='e2', out='m2')],
[fn.sum(msg='m1', out='v1'), fn.sum(msg='m2', out='v2'), fn.sum(msg='m1', out='v3')], [fn.sum(msg='m1', out='v1'), fn.sum(msg='m2', out='v2'), fn.sum(msg='m1', out='v3')],
None) None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
v3 = g.get_n_repr()['v3'] v3 = g.ndata['v3']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
assert th.allclose(v1, v3) assert th.allclose(v1, v3)
# run UDF with single message and reduce # run UDF with single message and reduce
g.update_all(message_func_edge, reduce_func, None) g.update_all(message_func_edge, reduce_func, None)
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
def test_send_and_recv_multi_fn(): def test_send_and_recv_multi_fn():
u = th.tensor([0, 0, 0, 3, 4, 9]) u = th.tensor([0, 0, 0, 3, 4, 9])
v = th.tensor([1, 2, 3, 9, 9, 0]) v = th.tensor([1, 2, 3, 9, 9, 0])
def message_func(hu, edge): def message_func(edges):
return {'m2': hu['f2']} return {'m2': edges.src['f2']}
def message_func_edge(hu, edge): def message_func_edge(edges):
return {'m2': hu['f2'] * edge['e2']} return {'m2': edges.src['f2'] * edges.data['e2']}
def reduce_func(hv, msgs): def reduce_func(nodes):
return {'v2' : th.sum(msgs['m2'], 1)} return {'v2' : th.sum(nodes.mailbox['m2'], 1)}
g = generate_graph() g = generate_graph()
g.set_n_repr({'v1' : th.zeros((10, D)), 'v2' : th.zeros((10, D)), g.set_n_repr({'v1' : th.zeros((10, D)), 'v2' : th.zeros((10, D)),
...@@ -177,45 +177,45 @@ def test_send_and_recv_multi_fn(): ...@@ -177,45 +177,45 @@ def test_send_and_recv_multi_fn():
fld = 'f2' fld = 'f2'
# send and recv, mix of builtin and UDF # send and recv, mix of builtin and UDF
g.send_and_recv(u, v, g.send_and_recv((u, v),
[fn.copy_src(src=fld, out='m1'), message_func], [fn.copy_src(src=fld, out='m1'), message_func],
[fn.sum(msg='m1', out='v1'), reduce_func], [fn.sum(msg='m1', out='v1'), reduce_func],
None) None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
# run builtin with single message and reduce # run builtin with single message and reduce
g.send_and_recv(u, v, fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out='v1'), g.send_and_recv((u, v), fn.copy_src(src=fld, out='m'), fn.sum(msg='m', out='v1'),
None) None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
# 1 message, 2 reduces # 1 message, 2 reduces
g.send_and_recv(u, v, g.send_and_recv((u, v),
fn.copy_src(src=fld, out='m'), fn.copy_src(src=fld, out='m'),
[fn.sum(msg='m', out='v2'), fn.sum(msg='m', out='v3')], [fn.sum(msg='m', out='v2'), fn.sum(msg='m', out='v3')],
None) None)
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
v3 = g.get_n_repr()['v3'] v3 = g.ndata['v3']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
assert th.allclose(v1, v3) assert th.allclose(v1, v3)
# send and recv with edge weights, 2 message, 3 reduces # send and recv with edge weights, 2 message, 3 reduces
g.send_and_recv(u, v, g.send_and_recv((u, v),
[fn.src_mul_edge(src=fld, edge='e1', out='m1'), fn.src_mul_edge(src=fld, edge='e2', out='m2')], [fn.src_mul_edge(src=fld, edge='e1', out='m1'), fn.src_mul_edge(src=fld, edge='e2', out='m2')],
[fn.sum(msg='m1', out='v1'), fn.sum(msg='m2', out='v2'), fn.sum(msg='m1', out='v3')], [fn.sum(msg='m1', out='v1'), fn.sum(msg='m2', out='v2'), fn.sum(msg='m1', out='v3')],
None) None)
v1 = g.get_n_repr()['v1'] v1 = g.ndata['v1']
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
v3 = g.get_n_repr()['v3'] v3 = g.ndata['v3']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
assert th.allclose(v1, v3) assert th.allclose(v1, v3)
# run UDF with single message and reduce # run UDF with single message and reduce
g.send_and_recv(u, v, message_func_edge, g.send_and_recv((u, v), message_func_edge,
reduce_func, None) reduce_func, None)
v2 = g.get_n_repr()['v2'] v2 = g.ndata['v2']
assert th.allclose(v1, v2) assert th.allclose(v1, v2)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -16,27 +16,27 @@ def generate_graph(grad=False): ...@@ -16,27 +16,27 @@ def generate_graph(grad=False):
g.add_edge(9, 0) g.add_edge(9, 0)
ncol = Variable(th.randn(10, D), requires_grad=grad) ncol = Variable(th.randn(10, D), requires_grad=grad)
ecol = Variable(th.randn(17, D), requires_grad=grad) ecol = Variable(th.randn(17, D), requires_grad=grad)
g.set_n_repr({'h' : ncol}) g.ndata['h'] = ncol
g.set_e_repr({'l' : ecol}) g.edata['l'] = ecol
return g return g
def test_basics(): def test_basics():
g = generate_graph() g = generate_graph()
h = g.get_n_repr()['h'] h = g.ndata['h']
l = g.get_e_repr()['l'] l = g.edata['l']
nid = [0, 2, 3, 6, 7, 9] nid = [0, 2, 3, 6, 7, 9]
sg = g.subgraph(nid) sg = g.subgraph(nid)
eid = {2, 3, 4, 5, 10, 11, 12, 13, 16} eid = {2, 3, 4, 5, 10, 11, 12, 13, 16}
assert set(sg.parent_eid.numpy()) == eid assert set(sg.parent_eid.numpy()) == eid
eid = sg.parent_eid eid = sg.parent_eid
# the subgraph is empty initially # the subgraph is empty initially
assert len(sg.get_n_repr()) == 0 assert len(sg.ndata) == 0
assert len(sg.get_e_repr()) == 0 assert len(sg.edata) == 0
# the data is copied after explict copy from # the data is copied after explict copy from
sg.copy_from_parent() sg.copy_from_parent()
assert len(sg.get_n_repr()) == 1 assert len(sg.ndata) == 1
assert len(sg.get_e_repr()) == 1 assert len(sg.edata) == 1
sh = sg.get_n_repr()['h'] sh = sg.ndata['h']
assert th.allclose(h[nid], sh) assert th.allclose(h[nid], sh)
''' '''
s, d, eid s, d, eid
...@@ -58,11 +58,11 @@ def test_basics(): ...@@ -58,11 +58,11 @@ def test_basics():
8, 9, 15 3 8, 9, 15 3
9, 0, 16 1 9, 0, 16 1
''' '''
assert th.allclose(l[eid], sg.get_e_repr()['l']) assert th.allclose(l[eid], sg.edata['l'])
# update the node/edge features on the subgraph should NOT # update the node/edge features on the subgraph should NOT
# reflect to the parent graph. # reflect to the parent graph.
sg.set_n_repr({'h' : th.zeros((6, D))}) sg.ndata['h'] = th.zeros((6, D))
assert th.allclose(h, g.get_n_repr()['h']) assert th.allclose(h, g.ndata['h'])
def test_merge(): def test_merge():
# FIXME: current impl cannot handle this case!!! # FIXME: current impl cannot handle this case!!!
...@@ -85,8 +85,8 @@ def test_merge(): ...@@ -85,8 +85,8 @@ def test_merge():
g.merge([sg1, sg2, sg3]) g.merge([sg1, sg2, sg3])
h = g.get_n_repr()['h'][:,0] h = g.ndata['h'][:,0]
l = g.get_e_repr()['l'][:,0] l = g.edata['l'][:,0]
assert th.allclose(h, th.tensor([3., 0., 3., 3., 2., 0., 1., 1., 0., 1.])) assert th.allclose(h, th.tensor([3., 0., 3., 3., 2., 0., 1., 1., 0., 1.]))
assert th.allclose(l, assert th.allclose(l,
th.tensor([0., 0., 1., 1., 1., 1., 0., 0., 0., 3., 1., 4., 1., 4., 0., 3., 1.])) th.tensor([0., 0., 1., 1., 1., 1., 0., 0., 0., 3., 1., 4., 1., 4., 0., 3., 1.]))
......
#!/bin/bash #!/bin/bash
GCN_EXAMPLE_DIR="../../examples/pytorch/gcn" GCN_EXAMPLE_DIR="../../examples/pytorch/"
function fail { function fail {
echo FAIL: $@ echo FAIL: $@
...@@ -29,8 +29,9 @@ fi ...@@ -29,8 +29,9 @@ fi
pushd $GCN_EXAMPLE_DIR> /dev/null pushd $GCN_EXAMPLE_DIR> /dev/null
# test CPU # test
python3 gcn.py --dataset cora --gpu $dev || fail "run gcn.py on $1" python3 pagerank.py || fail "run pagerank.py on $1"
python3 gcn_spmv.py --dataset cora --gpu $dev || fail "run gcn_spmv.py on $1" python3 gcn/gcn.py --dataset cora --gpu $dev || fail "run gcn/gcn.py on $1"
python3 gcn/gcn_spmv.py --dataset cora --gpu $dev || fail "run gcn/gcn_spmv.py on $1"
popd > /dev/null popd > /dev/null
###############################################################################
# A toy example
# -------------
#
# Let’s begin with the simplest graph possible with two nodes, and set
# the node representations:
import torch as th
import dgl
g = dgl.DGLGraph()
g.add_nodes(2)
g.add_edge(1, 0)
x = th.tensor([[0.0, 0.0], [1.0, 2.0]])
g.nodes[:].data['x'] = x
###############################################################################
# A syntax sugar for accessing feature data of all nodes
print(g.ndata['x'])
###############################################################################
# What we want to do is simply to copy representation from node#1 to
# node#0, but with a message passing interface. We do this like what we
# will do over a pair of sockets, with a send and a recv interface. The
# two user defined function (UDF) specifies the actions: deposit the
# value into an internal key-value store with the key msg, and retrive
# it. Note that there may be multiple incoming edges to a node, and the
# receiving end aggregates them.
def send_source(edges): # type is dgl.EdgeBatch
return {'msg': edges.src['x']}
def simple_reduce(nodes): # type is dgl.NodeBatch
msgs = nodes.mailbox['msg']
return {'x' : th.sum(msgs, dim=1)}
g.send((1, 0), message_func=send_source)
g.recv(0, reduce_func=simple_reduce)
print(g.ndata)
###############################################################################
# Some times the computation may involve representations on the edges.
# Let’s say we want to “amplify” the message:
w = th.tensor([2.0])
g.edata['w'] = w
def send_source_with_edge_weight(edges):
return {'msg': edges.src['x'] * edges.data['w']}
g.send((1, 0), message_func=send_source_with_edge_weight)
g.recv(0, reduce_func=simple_reduce)
print(g.ndata)
###############################################################################
# Or we may need to involve the desination’s representation, and here
# is one version:
def simple_reduce_addup(nodes):
msgs = nodes.mailbox['msg']
return {'x' : nodes.data['x'] + th.sum(msgs, dim=1)}
g.send((1, 0), message_func=send_source_with_edge_weight)
g.recv(0, reduce_func=simple_reduce_addup)
print(g.ndata)
del g.ndata['x']
del g.edata['w']
...@@ -10,36 +10,87 @@ The ``DGLGraph`` is the very core data structure in our library. It provides the ...@@ -10,36 +10,87 @@ The ``DGLGraph`` is the very core data structure in our library. It provides the
interfaces to manipulate graph structure, set/get node/edge features and convert interfaces to manipulate graph structure, set/get node/edge features and convert
from/to many other graph formats. You can also perform computation on the graph from/to many other graph formats. You can also perform computation on the graph
using our message passing APIs (see :ref:`tutorial-mp`). using our message passing APIs (see :ref:`tutorial-mp`).
TODO: 1) explain `tensor`; 2) enable g.nodes/edges[:][key]; 3) networkx conversion in one place
""" """
############################################################################### ###############################################################################
# Construct a graph # Construct a graph
# ----------------- # -----------------
# #
# In ``DGLGraph``, all nodes are represented using consecutive integers starting from # The design of ``DGLGraph`` was influenced by other graph libraries. Indeed, you can
# zero. All edges are directed. Let us start by creating a star network of 10 nodes # create a graph from `networkx <https://networkx.github.io/>`__, and convert it into a ``DGLGraph``
# where all the edges point to the center node (node#0). # and vice versa:
# TODO(minjie): it's better to plot the graph here.
import networkx as nx
import dgl import dgl
g_nx = nx.petersen_graph()
g_dgl = dgl.DGLGraph(g_nx)
import matplotlib.pyplot as plt
plt.subplot(121)
nx.draw(g_nx, with_labels=True)
plt.subplot(122)
nx.draw(g_dgl.to_networkx(), with_labels=True)
plt.show()
###############################################################################
# They are the same graph, except that ``DGLGraph`` are always `directional`.
#
# Creating a graph is a matter of specifying total number of nodes and the edges among them.
# In ``DGLGraph``, all nodes are represented using consecutive integers starting from
# zero, and you can add more nodes repeatedly.
#
# .. note::
#
# ``nx.add_node(100)`` adds a node with id 100, ``dgl.add_nodes(100)`` adds another 100 nodes into the graph.
g_dgl.clear()
g_nx.clear()
g_dgl.add_nodes(20)
print("We have %d nodes now" % g_dgl.number_of_nodes())
g_dgl.add_nodes(100)
print("Now we have %d nodes!" % g_dgl.number_of_nodes())
g_nx.add_node(100)
print("My nx buddy only has %d :( " % g_nx.number_of_nodes())
###############################################################################
# The most naive way to add edges are just adding them one by one, with a (*src, dst*) pair.
# Let's generate a star graph where all the edges point to the center (node#0).
star = dgl.DGLGraph() star = dgl.DGLGraph()
star.add_nodes(10) # add 10 nodes star.add_nodes(10) # add 10 nodes
for i in range(1, 10): for i in range(1, 10):
star.add_edge(i, 0) star.add_edge(i, 0)
print('#Nodes:', star.number_of_nodes()) nx.draw(star.to_networkx(), with_labels=True)
print('#Edges:', star.number_of_edges())
###############################################################################
# It's more efficient to add many edges with a pair of list, or better still, with a pair of tensors.
# TODO: needs to explain ``tensor``, since it's not a Python primitive data type.
# using lists
star.clear()
star.add_nodes(10)
src = [i for i in range(1, 10)]; dst = [0]*9
star.add_edges(src, dst)
# using tensor
star.clear()
star.add_nodes(10)
import torch as th
src = th.tensor(src); dst = th.tensor(dst)
star.add_edges(src, dst)
############################################################################### ###############################################################################
# ``DGLGraph`` also supports adding multiple edges at once by providing multiple # In addition to this, we also support
# source and destination nodes. Multiple nodes are represented using either a
# list or a 1D integer tensor(vector). In addition to this, we also support
# "edge broadcasting": # "edge broadcasting":
# #
# .. _note-edge-broadcast: # .. _note-edge-broadcast:
# #
# .. note:: # .. note::
# #
# Given two source and destination node list/tensor ``u`` and ``v``. # Given two source and destination node list/tensor ``u`` and ``v``.
# #
# - If ``len(u) == len(v)``, then this is a many-many edge set and # - If ``len(u) == len(v)``, then this is a many-many edge set and
...@@ -54,16 +105,13 @@ star.clear() # clear the previous graph ...@@ -54,16 +105,13 @@ star.clear() # clear the previous graph
star.add_nodes(10) star.add_nodes(10)
u = list(range(1, 10)) # can also use tensor type here (e.g. torch.Tensor) u = list(range(1, 10)) # can also use tensor type here (e.g. torch.Tensor)
star.add_edges(u, 0) # many-one edge set star.add_edges(u, 0) # many-one edge set
print('#Nodes:', star.number_of_nodes())
print('#Edges:', star.number_of_edges())
############################################################################### ###############################################################################
# In ``DGLGraph``, each edge is assigned an internal edge id (also a consecutive # In ``DGLGraph``, each edge is assigned an internal edge id (also a consecutive
# integer starting from zero). The ids follow the addition order of the edges # integer starting from zero). The ids follow the addition order of the edges
# and you can query the id using the ``edge_ids`` interface. # and you can query the id using the ``edge_ids`` interface, which returns a tensor.
print(star.edge_ids(1, 0)) # the first edge print(star.edge_ids(1, 0)) # query edge id of 1->0; it happens to be the first edge!
print(star.edge_ids([8, 9], 0)) # ask for ids of multiple edges print(star.edge_ids([8, 9], 0)) # ask for ids of multiple edges
...@@ -79,8 +127,8 @@ print(star.edge_ids([8, 9], 0)) # ask for ids of multiple edges ...@@ -79,8 +127,8 @@ print(star.edge_ids([8, 9], 0)) # ask for ids of multiple edges
# ---------------------- # ----------------------
# Nodes and edges can have feature data in tensor type. They can be accessed/updated # Nodes and edges can have feature data in tensor type. They can be accessed/updated
# through a key-value storage interface. The key must be hashable. The value should # through a key-value storage interface. The key must be hashable. The value should
# be features of each node and edge batched on the *first* dimension. For example, # be features of each node and edge, batched on the *first* dimension. For example,
# following codes create features for all nodes (``hv``) and features for all # the following codes create features for all nodes (``hv``) and features for all
# edges (``he``). Each feature is a vector of length 3. # edges (``he``). Each feature is a vector of length 3.
# #
# .. note:: # .. note::
...@@ -102,12 +150,20 @@ star.set_e_repr({'he' : efeat}) ...@@ -102,12 +150,20 @@ star.set_e_repr({'he' : efeat})
############################################################################### ###############################################################################
# .. note::
# The first dimension of the node feature has length equal the number of nodes,
# whereas of the edge feature the number of edges.
#
# We can then set some nodes' features to be zero. # We can then set some nodes' features to be zero.
# TODO(minjie): enable following syntax # TODO(minjie): enable following syntax
# print(star.nodes[:]['hv']) # print(star.nodes[:]['hv'])
print("node features:")
print(star.get_n_repr()['hv']) print(star.get_n_repr()['hv'])
print("\nedge features:")
print(star.get_e_repr()['he'])
# set node 0, 2, 4 feature to zero # set node 0, 2, 4 feature to zero
print("\nresetting features at node 0, 2 and 4...")
star.set_n_repr({'hv' : th.zeros((3, D))}, [0, 2, 4]) star.set_n_repr({'hv' : th.zeros((3, D))}, [0, 2, 4])
print(star.get_n_repr()['hv']) print(star.get_n_repr()['hv'])
...@@ -129,11 +185,12 @@ print(star.node_attr_schemes()) ...@@ -129,11 +185,12 @@ print(star.node_attr_schemes())
############################################################################### ###############################################################################
# If a new feature is added for some but not all of the nodes/edges, we will # If a new feature is added for some but not all of the nodes/edges, we will
# automatically create empty features for the others to make sure that features are # automatically create empty features for the others to make sure that features are
# always aligned. By default, we fill zero for the empty features. The behavior # always aligned. By default, we zero-fill the empty features. The behavior
# can be changed using ``set_n_initializer`` and ``set_e_initializer``. # can be changed using ``set_n_initializer`` and ``set_e_initializer``.
star.set_n_repr({'hv_1' : th.randn((3, D+1))}, [0, 2, 4]) star.set_n_repr({'hv_1' : th.randn((3, D+1))}, [0, 2, 4])
print(star.node_attr_schemes()) print(star.node_attr_schemes())
print(star.get_n_repr()['hv'])
print(star.get_n_repr()['hv_1']) print(star.get_n_repr()['hv_1'])
......
...@@ -10,6 +10,7 @@ The algorithm aims to provide a better alternative to current neural network str ...@@ -10,6 +10,7 @@ The algorithm aims to provide a better alternative to current neural network str
By using DGL library, users can implement the algorithm in a more intuitive way. By using DGL library, users can implement the algorithm in a more intuitive way.
""" """
############################################################################## ##############################################################################
# Model Overview # Model Overview
# --------------- # ---------------
...@@ -25,8 +26,9 @@ By using DGL library, users can implement the algorithm in a more intuitive way. ...@@ -25,8 +26,9 @@ By using DGL library, users can implement the algorithm in a more intuitive way.
# ``````````````````` # ```````````````````
# In papers, author states that "A capsule is a group of neurons whose activity vector # In papers, author states that "A capsule is a group of neurons whose activity vector
# represents the instantiation parameters of a specific type of entity such as an object # represents the instantiation parameters of a specific type of entity such as an object
# or an object part." # or an object part."
# Generally Speaking, the idea of capsule is to encode all the information about the #
# Generally speaking, the idea of capsule is to encode all the information about the
# features into a vector form, by substituting scalars in traditional neural network with vectors. # features into a vector form, by substituting scalars in traditional neural network with vectors.
# And use the norm of the vector to represents the meaning of original scalars. # And use the norm of the vector to represents the meaning of original scalars.
# #
...@@ -46,36 +48,24 @@ By using DGL library, users can implement the algorithm in a more intuitive way. ...@@ -46,36 +48,24 @@ By using DGL library, users can implement the algorithm in a more intuitive way.
# #
# Model Implementations # Model Implementations
# ------------------------- # -------------------------
# Setup
# ```````````````````````````
import dgl
import torch
import torch.nn.functional as F
from torch import nn
class DGLBatchCapsuleLayer(nn.Module): ##############################################################################
def __init__(self, input_capsule_dim, input_capsule_num, output_capsule_num, output_capsule_dim, num_routing, # Algorithm Overview
cuda_enabled): # ```````````````````````````
super(DGLBatchCapsuleLayer, self).__init__() #
self.device = "cuda" if cuda_enabled else "cpu" # .. image:: https://raw.githubusercontent.com/VoVAllen/DGL_Capsule/master/algorithm.png
self.input_capsule_dim = input_capsule_dim #
self.input_capsule_num = input_capsule_num # The main step of routing algorithm is line 4 - 7. In ``DGLGraph`` structure, we consider these steps as a message passing
self.output_capsule_dim = output_capsule_dim # procedure.
self.output_capsule_num = output_capsule_num
self.num_routing = num_routing
self.weight = nn.Parameter(
torch.randn(input_capsule_num, output_capsule_num, output_capsule_dim, input_capsule_dim))
self.g, self.input_nodes, self.output_nodes = self.construct_graph()
############################################################################## ##############################################################################
# Consider capsule routing as a graph structure # Consider capsule routing as a graph structure
# ```````````````````````````````````````````````````````````````````````````` # ````````````````````````````````````````````````````````````````````````````
# We can consider each capsule as a node in a graph, and connect all the nodes between layers. # We can consider each capsule as a node in a graph, and connect all the nodes between layers.
# #
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png
# :height: 200px # :height: 150px
# #
def construct_graph(self): def construct_graph(self):
g = dgl.DGLGraph() g = dgl.DGLGraph()
g.add_nodes(self.input_capsule_num + self.output_capsule_num) g.add_nodes(self.input_capsule_num + self.output_capsule_num)
...@@ -88,116 +78,165 @@ def construct_graph(self): ...@@ -88,116 +78,165 @@ def construct_graph(self):
v.append(j) v.append(j)
g.add_edges(u, v) g.add_edges(u, v)
return g, input_nodes, output_nodes return g, input_nodes, output_nodes
DGLBatchCapsuleLayer.construct_graph = construct_graph # This line is for defining class in multiple cells.
############################################################################## ##############################################################################
# Initialization & Affine Transformation # Write Message Passing Functions
# ``````````````````````````````````
# Reduce Functions (line 4 - 5)
# .............................................
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
#
# At this stage, we need to define a reduce function to aggregate the node features
# from layer :math:`l` and weighted sum them into layer :math:`(l+1)`'s node features.
#
# .. note::
# The softmax operation is over dimension :math:`j` instead of :math:`i`.
def capsule_reduce(node, msg):
b_ij_c, u_hat = msg['b_ij'], msg['u_hat']
# line 4
c_i = F.softmax(b_ij_c, dim=0)
# line 5
s_j = (c_i.unsqueeze(2).unsqueeze(3) * u_hat).sum(dim=1)
return {'h': s_j}
##############################################################################
# Node Update Functions (line 6)
# ......................................................
# Squash the intermediate representations into node features :math:`v_j`
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
#
def capsule_update(msg):
v_j = squash(msg['h'])
return {'h': v_j}
##############################################################################
# Edge Update Functions (line 7)
# ...........................................................................
# Update the routing parameters by updating edges in graph
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png
#
def update_edge(u, v, edge):
return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)}
##############################################################################
# Call DGL function to execute algorithm
# ````````````````````````````````````````````````````````````````````````````
# Call ``update_all`` and ``update_edge`` functions to execute the whole algorithms.
# Message function is to define which attributes are needed in further computations
#
def routing(self):
def capsule_msg(src, edge):
return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']}
self.g.update_all(capsule_msg, capsule_reduce, capsule_update)
self.g.update_edge(edge_func=update_edge)
##############################################################################
# Forward Function
# ```````````````````````````````````````````````````````````````````````````` # ````````````````````````````````````````````````````````````````````````````
# This section shows the whole process of forward process of capsule routing algorithm.
def forward(self, x):
self.batch_size = x.size(0)
u_hat = self.compute_uhat(x)
self.initialize_nodes_and_edges_features(u_hat)
for i in range(self.num_routing):
self.routing()
this_layer_nodes_feature = self.g.get_n_repr()['h'][
self.input_capsule_num:self.input_capsule_num + self.output_capsule_num]
return this_layer_nodes_feature.transpose(0, 1).unsqueeze(1).unsqueeze(4).squeeze(1)
##############################################################################
# Other Workaround
# ````````````````````````````````````````````````````````````````
# Initialization & Affine Transformation
# ..................................................
# This section implements the transformation operation in capsule networks,
# which transform capsule into different dimensions.
# - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute # - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute
# - Initialize node features as zero # - Initialize node features as zero
# #
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png
# #
def forward(self, x):
self.batch_size = x.size(0) def compute_uhat(self, x):
# x is the input vextor with shape [batch_size, input_capsule_dim, input_num] # x is the input vextor with shape [batch_size, input_capsule_dim, input_num]
# Transpose x to [batch_size, input_num, input_capsule_dim] # Transpose x to [batch_size, input_num, input_capsule_dim]
x = x.transpose(1, 2) x = x.transpose(1, 2)
# Expand x to [batch_size, input_num, output_num, input_capsule_dim, 1] # Expand x to [batch_size, input_num, output_num, input_capsule_dim, 1]
x = torch.stack([x] * self.output_capsule_num, dim=2).unsqueeze(4) x = torch.stack([x] * self.output_capsule_num, dim=2).unsqueeze(4)
# Expand W from [input_num, output_num, input_capsule_dim, output_capsule_dim] # Expand W from [input_num, output_num, input_capsule_dim, output_capsule_dim]
# to [batch_size, input_num, output_num, output_capsule_dim, input_capsule_dim] # to [batch_size, input_num, output_num, output_capsule_dim, input_capsule_dim]
W = self.weight.expand(self.batch_size, *self.weight.size()) W = self.weight.expand(self.batch_size, *self.weight.size())
# u_hat's shape is [input_num, output_num, batch_size, output_capsule_dim] # u_hat's shape is [input_num, output_num, batch_size, output_capsule_dim]
u_hat = torch.matmul(W, x).permute(1, 2, 0, 3, 4).squeeze().contiguous() u_hat = torch.matmul(W, x).permute(1, 2, 0, 3, 4).squeeze().contiguous()
return u_hat
b_ij = torch.zeros(self.input_capsule_num, self.output_capsule_num).to(self.device)
def initialize_nodes_and_edges_features(self, u_hat):
b_ij = torch.zeros(self.input_capsule_num, self.output_capsule_num).to(self.device)
self.g.set_e_repr({'b_ij': b_ij.view(-1)}) self.g.set_e_repr({'b_ij': b_ij.view(-1)})
self.g.set_e_repr({'u_hat': u_hat.view(-1, self.batch_size, self.output_capsule_dim)}) self.g.set_e_repr({'u_hat': u_hat.view(-1, self.batch_size, self.output_capsule_dim)})
self.routing()
# Initialize all node features as zero # Initialize all node features as zero
node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size, node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size,
self.output_capsule_dim).to(self.device) self.output_capsule_dim).to(self.device)
self.g.set_n_repr({'h': node_features}) self.g.set_n_repr({'h': node_features})
DGLBatchCapsuleLayer.forward = forward
############################################################################## ##############################################################################
# Write Message Passing functions and Squash function
# ````````````````````````````````````````````````````````````````````````````
# Squash function # Squash function
# .................. # ..................
# Squashing function is to ensure that short vectors get shrunk to almost zero length and # Squashing function is to ensure that short vectors get shrunk to almost zero length and
# long vectors get shrunk to a length slightly below 1. # long vectors get shrunk to a length slightly below 1. Its norm is expected to represents probabilities
# # at some levels.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png
# :height: 100px # :height: 100px
# #
def squash(s): def squash(s, dim=2):
mag_sq = torch.sum(s ** 2, dim=2, keepdim=True) sq = torch.sum(s ** 2, dim=dim, keepdim=True)
mag = torch.sqrt(mag_sq) s_std = torch.sqrt(sq)
s = (mag_sq / (1.0 + mag_sq)) * (s / mag) s = (sq / (1.0 + sq)) * (s / s_std)
return s return s
############################################################################## ##############################################################################
# Message Functions # General Setup
# .................. # .................
# At first stage, we need to define a message function to get all the attributes we need
# in the further computations.
def capsule_msg(src, edge):
return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']}
############################################################################## import dgl
# Reduce Functions import torch
# .................. import torch.nn.functional as F
# At this stage, we need to define a reduce function to aggregate all the information we from torch import nn
# get from message function into node features.
# This step implements the line 4 and line 5 in routing algorithms, which softmax over
# :math:`b_{ij}` and calculate weighted sum of input features.
#
# .. note::
# The softmax operation is over dimension :math:`j` instead of :math:`i`.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
#
def capsule_reduce(node, msg):
b_ij_c, u_hat = msg['b_ij'], msg['u_hat']
# line 4
c_i = F.softmax(b_ij_c, dim=0)
# line 5
s_j = (c_i.unsqueeze(2).unsqueeze(3) * u_hat).sum(dim=1)
return {'h': s_j}
##############################################################################
# Node Update Functions
# ...........................
# Squash the intermidiate representations into node features :math:`v_j`
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
#
def capsule_update(msg):
v_j = squash(msg['h'])
return {'h': v_j}
############################################################################## class DGLDigitCapsuleLayer(nn.Module):
# Edge Update Functions def __init__(self, input_capsule_dim=8, input_capsule_num=1152, output_capsule_num=10, output_capsule_dim=16,
# .......................... num_routing=3, device='cpu'):
# Update the routing parameters super(DGLDigitCapsuleLayer, self).__init__()
# self.device = device
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png self.input_capsule_dim = input_capsule_dim
# self.input_capsule_num = input_capsule_num
def update_edge(u, v, edge): self.output_capsule_dim = output_capsule_dim
return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)} self.output_capsule_num = output_capsule_num
self.num_routing = num_routing
self.weight = nn.Parameter(
torch.randn(input_capsule_num, output_capsule_num, output_capsule_dim, input_capsule_dim))
self.g, self.input_nodes, self.output_nodes = self.construct_graph()
##############################################################################
# Executing algorithm # This section is for defining class in multiple cells.
# ..................... DGLDigitCapsuleLayer.construct_graph = construct_graph
# Call `update_all` and `update_edge` functions to execute the algorithms DGLDigitCapsuleLayer.forward = forward
def routing(self): DGLDigitCapsuleLayer.routing = routing
for i in range(self.num_routing): DGLDigitCapsuleLayer.compute_uhat = compute_uhat
self.g.update_all(capsule_msg, capsule_reduce, capsule_update) DGLDigitCapsuleLayer.initialize_nodes_and_edges_features = initialize_nodes_and_edges_features
self.g.update_edge(edge_func=update_edge)
DGLBatchCapsuleLayer.routing = routing
"""
Graph Convolutional Network New
====================================
**Author**: `Qi Huang`
This is a brief entry to DGL and its message passing API through GCN(graph convolutional network).
"""
##############################################################################
# Message Passing: Warming up
# ---------------------------
#
# Let's begin with the simplest graph possible with two nodes, and set the node representations:
import torch as th
import dgl
g = dgl.DGLGraph()
g.add_nodes(2)
g.add_edge(1, 0)
x = th.tensor([[0.0, 0.0], [1.0, 2.0]])
g.set_n_repr({'x': x})
##############################################################################
# What we want to do is simply to copy representation from node#1 to node#0, but with
# a message passing interface. We do this like what we will do over a pair of sockets,
# with a ``send`` and a ``recv`` interface.
# The two `user defined function (UDF)` specifies the actions: deposit the value into an internal
# key-value store with the key `msg`, and retrive it. Note that there may be multiple incoming edges
# to a node, and the receiving end aggregates them.
#
# .. note::
# * ``send(src, dst)`` defines an edge explictly, so ``message_func`` taking ``edge`` as an
# argument is confusing.
# * following graph construction semantics, it'll be nice to allow ``src`` and ``dst`` as a pair
# of lists, or a pair of tensor, though this example doesn't demonstrate it.
# * likewise, since we allow edge broadcasting, we should allow it in ``send`` as well.
# * what's the side-effect of doing a send action? we are left with the impression that the second argument
# in the ``reduce_func`` (i.e. ``msgs``) magically gets the stuff with the same key.
# * my preference is to say that expected side-effect is simply that the result of a ``send`` action is available
# at ``dst['key']``, where ``key`` is whatever the user specified in ``message_func``. this allows
# for cases where we use ``apply_node_func``.
# * in other words,
# ``message_func`` returns ``{'hey': [1.0]}``, we expect to see ``dst['hey']``. if that happens
# to be the represnetation key, then a replacement is done. user can define a new key, e.g. ``accum``,
# then the ``reduce_func`` and ``apply_node_func`` can do whatever they want. typically,
# they should return with the representation key to perform update.
#
def send_source(src, edge):
return {'msg': src['x']}
def simple_reduce(node, msgs):
return {'x' : th.sum(msgs['msg'], dim=1)}
g.send(1, 0, message_func=send_source)
g.recv([0], reduce_func=simple_reduce)
print(g.get_n_repr())
##############################################################################
# Some times the computation may involve representations on the edges. Let's say we want to "amplify"
# the message:
w = th.tensor([2.0])
g.set_e_repr({'w': w})
def send_source_with_edge_weight(src, edge):
return {'msg': src['x'] * edge['w']}
g.send(1, 0, message_func=send_source_with_edge_weight)
g.recv([0], reduce_func=simple_reduce)
print(g.get_n_repr())
##############################################################################
# Or we may need to involve the desination's representation, and here is one version:
def simple_reduce_addup(node, msgs):
return {'x' : node['x'] + th.sum(msgs['msg'], dim=1)}
g.send(1, 0, message_func=send_source_with_edge_weight)
g.recv([0], reduce_func=simple_reduce_addup)
print(g.get_n_repr())
##############################################################################
# A slightly more complex but more flexible one is to store the reduced sum at the node under
# a different key, and then call the ``apply_node_func``:
#
# .. note::
# that the stuff magically appear as part of node's key-value is non-intuitive.
def simple_reduce_to_accum(node, msgs):
return {'accum' : th.sum(msgs['msg'], dim=1)}
def simple_apply(node):
return {'x': node['x'] + node['accum']}
g.send(1, 0, message_func=send_source_with_edge_weight)
g.recv([0], reduce_func=simple_reduce_to_accum, apply_node_func=simple_apply)
print(g.get_n_repr())
##############################################################################
# The ``send`` and ``recv`` is **level-1** call in DGL, they have the finest control over routing
# the message.
#
# TODO: build a star graph (reuse the one in 2_graph.py), and use pull (or push)
#
# TODO: build a much bigger graph, explain with spMV and the use of ``update_all``
#
##############################################################################
# Model Overview
# ---------------
# Introduction
# ```````````````````
# This is a simple implementation of Kipf & Welling's Semi-Supervised Classificaton with Graph Convolutional Networks in ICLR 2017, which propose a simple yet efficient model that extends convolutional neual network from the grid structured data we all familiar and like to graphs, like social network and knowledge graph. It starts from the framework of spectral graph convolutions and makes reasonable simplifications to achieve both faster training and higher prediction accuracy. It also achieves start-of-the-art classification results on a number of graph datasets like CORA, etc. /TODO: elaborate.
# Note that this is not intended to be an end-to-end lecture on Kiph & Willing's GCN paper. In this tutorial, we aim at providing a friendly entry to showcase how to code up a contemporary NN model operating on graph structure data, and increases user's understanding of DGL's message passing API in action. For a more thorough understanding of the derivation and all details of GCN, please visit the original paper. /TODO(hq): add link.
#
# GCN in one formula
# `````````````````````
# Essentially, GCN's model boils down to the following oen formula
# :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})`
#
# The equation above describes a "graph convolution layer" in GCN.
# Essentially, :math:`H^{(l)}` denotes the lth layer in the network, :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for this layer. :math:`D` and :math:`A`, as commonly seen, represent degree matrix and adjacency matrix, respectively. The ~ is a renormalization trick in which we add a self-connection to each node of the graph, and build the corresponding degree and adjacency matrix.
#
# The shape of the input :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes and :math:`D` is the number of input features. We can chain up multiple layers as such to produce a node-level representation output with shape :math:`N \times F`, where :math:`F` is the dimension of the output node feature vector.
#
# Derivation of GCN
# ``````````````````
# \TODO(hq) do we need a short description of how we departure from spectral based method and end with GCN?
# According to others, this amounts to a laplacian smoothing.
#
# Understanding GCN from Message Passing
# ````````````````````````````````````````
# Think about :math:`W^{(l)}` just as a matrix of
# filter parameters to project :math:`H^{(l)}`.
# :math:`\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}` as a symmetrical normalization of the
# adjacency matrix.
#
# Combining these two, we arrives at a must succint form of GCN :
# :math:`\sigma(\hat{A}\hat{H}^{(l)})`
# where :math:`\hat{A}` means a normalized version of
# adjacency matrix, and :math:`\hat{H}` means a
# projection of last layer's node-level representation :math:`H`.
#
# We can further formulate multiplication with the adjacency matrix as performing message passing between nodes following paths encoding in the adjacency matrix.
# To make it simple, let's denote the input signal on a graph :math:`G = (V,E)` as :math:`x \in \mathcal{R}^{|\mathcal{V}|x1}`, assume each node's feature is only a scalar.
# Then, if we calculate :math:`x_{t+1} = Ax_{t}`, it amounts to perform a message passing operation on existing edges. The ith node's new feature :math:`x_{t+1}^{i}` essentially adds up the old feature vector :math:`x_{t}`, when the corresponding node index has non-zero entry on the ith row of the adjacency matrix A, i.e. has edge connection with node i. If we multiply the resulting vector :math:`x_{t+1}` again with A, the resulting vector, :math:`A^{2}x_{t}`, will be the resulting feature vector after two rounds of message-passing is performed. In this sense, :math:`A^2` encodes 2-hop neighborhood information for each node. By k-hop neighborhood, we mean any node reachable with exactly k steps starting from the current node (if self connection is not included in the original adjacency matrix), or any node reachable within k steps from the current node if self connection is included). In another view, we can also understand :math:`A^2` as :math:`A^2_{i,j}` = OR(k){ A_{i,k} && A_{k,j}}.
#
# Nonetheless, in GCN we only use :math:`\sigma(\hat{A}\hat{H}^{(l)})` in each layer, meaning we only propagate information among each node's 1-hop neighborhood for each layer.
#
#
# Model Implementation
# ------------------------
# Warming up of message passing API
# ````````````````````````````````````
# DGL provides 3 levels of message passing API, giving user different level of control. Below we demonstrate three different levels of APIs on a simple star graph of size 10, where node 1-9 all sends information to node 0.
#
# Level 1 -- send, recv, and apply_node
# ..........................................
# The most basic level is ``send(srs,dst,message_function)``, ``recv(node,reduce_function)``, and ``apply_nodes(nodes)``.
# ``send()`` and ``recv()`` allow users to designate specific pairs of (source, destination) to pass information. ``apply_nodes()`` allow users to perform per-node computation.
#
# Three functions need to be pre-specified when using message pasing api: 1) message function 2) reduce function 3) apply function. Message function determines what message is passed along edges; reduce function determines how messages are aggregated at the destination node; apply functions determines Note that all these three functions can be either defined by users, or use built-in functions when importing ``dgl.function``. For a more detailed description of built-in function syntax, please see \TODO(hq) add hyperref.
#
# User don't have to pass message_function and reduce_function everytime as parameters to the function if they registered them in the graph in priori, as shown in the following code.
import argparse
import time
import torch as th
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import dgl
import networkx as nx
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
star = dgl.DGLGraph()
star.add_nodes(10)
u = list(range(1,10))
star.add_edges(u,0) # create the graph
D = 1 # the feature dimension
N = star.number_of_nodes()
M = star.number_of_edges()
nfeat = th.ones((N, D)) # each node's feature is just 1
efeat = th.ones((M, D))*2 # each edge's feature is 2.
star.set_n_repr({'hv' : nfeat})
star.set_e_repr({'he' : efeat})
u = th.tensor([0])
v = th.tensor([1,2,3,4,5]) #sending node 1-5's node feature to node 0's.
def _message_test(src,edge):
return {'hv':src['hv']}
def _reduce(node,msgs):
return{'hv':node['hv']+msgs['hv'].sum(1)}
# aggregate alone the second dimension as
# the first dimension is reserved for batching in DGL.
star.register_message_func(_message_test)
star.register_reduce_func(_reduce)
star.send(v,u)
# DGL supports batching send/recv and broadcasting.
star.recv(u)
#We expect to get 6 on node 0.
print(star.get_n_repr()['hv'])
##########################################################################
# Level 2 -- pull, push, and send_and_recv
# ............................................
# It could be both tedious and inefficient for user to call ``send()`` and ``recv()`` respectively. DGL comes into aid by providing a series of higher level APIs which also increase the performance by operator fusion in the backend ``/TODO(gaiyu) verify this statement please``.
# ``send_and_recv(src,dst,message_func,reduce_func,apply_func)`` is essentially a wrapper around send and receive.
# pull(node,message_func,reduce_func,apply_func) will take the input nodes as destination nodes, and all their predeseccor nodes as source nodes, and perform ``send_and_recv()``
# push(node,message_func,reduce_func,apply_func) will take the input nodes as source nodes, and all their descendant nodes as destination nodes, and perform ``send_and_recv()``
#
# Notice that apply function is usually optional in message passing APIs.
star.set_n_repr({'hv' : nfeat}) #reset node repr
star.set_e_repr({'he' : efeat}) #reset edge repr
star.send_and_recv(v,u) #note that here apply functon is left blank
print(star.get_n_repr()['hv']) # we expect to get 6 on node 0
#####################################################################
#
# Then we register the apply function.
#
def _apply_test(node):
return {'hv':500*node['hv']}
star.register_apply_node_func(_apply_test)
star.apply_nodes(u)
print(star.get_n_repr()['hv']) #we expect to get 3000 on node 0
#########################################################################
star.set_n_repr({'hv' : nfeat}) #reset node repr
star.set_e_repr({'he' : efeat}) #reset edge repr
star.pull(u)
print(star.get_n_repr()['hv']) # we expect to get 3000 on node 0
###################################################################
star.set_n_repr({'hv' : nfeat}) #reset node repr
star.set_e_repr({'he' : efeat}) #reset edge repr
star.push(v)
print(star.get_n_repr()['hv']) # we expect to get 3000 on node 0
#######################################################################
# Level 3 -- update_all
# ..........................
# In many cases, user would like to perform message passing on all the edges simoutaneously, such as in the case of adjacency matrix multiplication in GCN. DGL also provides ``update_all()`` method to achieve this, also optimizing the performance under the hood.
star.set_n_repr({'hv' : nfeat}) #reset node repr
star.set_e_repr({'he' : efeat}) #reset edge repr
star.update_all(apply_node_func = None)
print(star.get_n_repr()['hv']) # we expect to get 10 on node 0, as we choose not to perform any apply_node functions
#
##########################################################
# Model Implementation
# ``````````````````````````````
# Model definition
# ....................
# Similar to above, we first define the message function, reduce function and apply function for GCN.
def gcn_msg(src, edge):
return {'m' : src['h']} #return node feature
def gcn_reduce(node, msgs):
return {'h' : th.sum(msgs['m'], 1)} # aggregate incoming node features
class NodeApplyModule(nn.Module):
def __init__(self, in_feats, out_feats, activation=None):
super(NodeApplyModule, self).__init__()
self.linear = nn.Linear(in_feats, out_feats)
self.activation = activation #apply a filter and non-linearity.
def forward(self, node):
h = self.linear(node['h'])
if self.activation:
h = self.activation(h)
#raise RuntimeError(h.shape)
return {'h' : h}
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
mode=1):
super(GCN, self).__init__()
self.g = g #graph is passed as a parameter to the model
self.dropout = dropout
# input layer
self.layers = nn.ModuleList([NodeApplyModule(in_feats, n_hidden, activation)])
# hidden layers
for i in range(n_layers - 1):
self.layers.append(NodeApplyModule(n_hidden, n_hidden, activation))
# output layer
self.layers.append(NodeApplyModule(n_hidden, n_classes))
self.mode = mode # indicate DGL message passing level for subsequent use
# Message passing in 3 levels --- level 1
def lv1_mp(self, layer):
nodeIdList = list(i for i in range(self.g.number_of_nodes()))
for s in nodeIdList:
self.g.send(s, nodeIdList, gcn_msg)
self.g.recv(nodeIdList, gcn_reduce, layer)
#self.g.apply_nodes(nodeIdList, layer)
# Message passing in 3 levels --- level 2
def lv2_mp(self, layer):
dst = list(i for i in range(self.g.number_of_nodes()))
self.g.pull(dst, gcn_msg, gcn_reduce, layer)
# Message passing in 3 levels -- level 3
def lv3_mp(self, layer):
#nodeIdList = list(i for i in range(self.g.number_of_nodes()))
self.g.update_all(gcn_msg, gcn_reduce, layer)
#self.g.update_all(gcn_msg, gcn_reduce)
#self.g.apply_nodes(nodeIdList, layer)
# Below is the forward function
def forward(self, features):
self.g.set_n_repr({'h' : features})
for layer in self.layers:
# apply dropout
if self.dropout:
g.apply_nodes(apply_node_func=
lambda node: F.dropout(node['h'], p=self.dropout))
assert self.mode in [1,2,3]
if self.mode == 1 :
self.lv1_mp(layer)
elif self.mode == 2 :
self.lv2_mp(layer)
else :
self.lv3_mp(layer)
return self.g.pop_n_repr('h')
######################################################################
# Training & Inference
# ``````````````````````````````````
# Below we train the model and perform inference.
from dgl.data import citation_graph as citegrh
data = citegrh.load_cora()
features = th.FloatTensor(data.features)
print(type(features))
print(type(data.features))
labels = th.LongTensor(data.labels)
mask = th.ByteTensor(data.train_mask)
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = data.graph.number_of_edges()
# Some training hyperparameters for illustration
#cuda = False #Not sure whether there is cuda or not
cuda = True
th.cuda.set_device(-1)
features = features.cuda()
labels = labels.cuda()
mask = mask.cuda()
n_hidden = 16
n_layers = 1
dropout = 0
n_epochs = 200
lr = 1e-3
g = DGLGraph(data.graph)
model = GCN(g,
in_feats,
n_hidden,
n_classes,
n_layers,
F.relu,
dropout,
mode = 3) #level 3 message passing
model2 = GCN(g,
in_feats,
n_hidden,
n_classes,
n_layers,
F.relu,
dropout,
mode = 3) #level 2 message passing
model.cuda()
model2.cuda()
# use optimizer
optimizer = th.optim.Adam(model2.parameters(), lr=lr)
# initialize graph
dur = []
for epoch in range(n_epochs):
if epoch >=3:
t0 = time.time()
#forward
logits = model2(features)
logp = F.log_softmax(logits, 1)
loss = F.nll_loss(logp[mask], labels[mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
epoch, loss.item(), np.mean(dur), n_edges / np.mean(dur) /1000))
"""
Graph Convolutional Network New
====================================
**Author**: `Quan Gan`
In this tutorial, we will go through the basics of DGL, in the following order:
1. Creating a graph
2. Setting/getting node/edge states
3. Updating node/edge states using user-defined functions
4. Passing information to edges from endpoint nodes
5. Passing information to nodes from adjacent nodes and edges
6. Implementing a Graph Convolutional Network (GCN) and a Graph Attention
Network (GAT)
7. Using built-in functions to simplify your implementation
"""
##############################################################################
# Section 1. Creating a Graph
# ---------------------------
#
# Let's say we want to create the following graph:
#
# .. digraph:: foo
#
# digraph foo {
# layout=circo;
# "A" -> "B" -> "C" -> "A";
# }
#
# First, we need to create a ``DGLGraph`` object.
from dgl import DGLGraph
g = DGLGraph()
##############################################################################
# And then we add 3 vertices (or *nodes*) into ``g``:
g.add_nodes(3)
##############################################################################
# In DGL, all vertices are uniquely identified by integers, starting from 0.
# Assuming that we map the node ``A``, ``B``, and ``C`` to ID 0, 1, and 2, we
# can add the edges of the desired graph above as follows:
g.add_edge(0, 1)
g.add_edge(1, 2)
g.add_edge(2, 0)
# Or, equivalently
# g.add_edges([0, 1, 2], [1, 2, 0])
##############################################################################
# All the edges are also uniquely identified by integers, again starting from
# 0. The edges are labeled in the order of addition. In the example above,
# the edge ``0 -> 1`` is labeled as edge #0, ``1 -> 2`` as edge #1, and
# ``2 -> 0`` as edge #2.
##############################################################################
# Section 2. Setting/getting node/edge states
# --------------------------------------
# Now, we wish to assign the nodes some states, or features.
#
# In DGL, the node/edge states are represented as dictionaries, with strings
# as keys (or *fields*), and tensors as values. DGL aims to be
# framework-agnostic, and currently it supports PyTorch and MXNet. From now
# on, we use PyTorch as an example.
#
# You can set up states for some or all nodes at the same time in DGL.
# All you need is to stack the tensors along the first dimension for each
# key, and feed the dictionary of the stacked tensors into ``set_n_repr``
# as a whole.
import torch
# We are going to assign each node two states X and Y. For each node,
# X is a 2-D vector and Y is a 2x4 matrix. You only need to make sure
# the tensors with the same key across all the (set) nodes to have the
# same shape and data type.
X = torch.randn(3, 2)
Y = torch.randn(3, 2, 4)
# You can set the states for all of them...
g.set_n_repr({'X': X, 'Y': Y})
# ... or setting partial states, but only after you have set all nodes on
# at least one key.
# TODO: do we want to fix this behavior to allow initial partial setting?
g.set_n_repr({'X': X[0:2], 'Y': Y[0:2]}, [0, 1])
# You can also overwrite part of the fields. The following overwrites field
# X while keeping Y intact.
X = torch.randn(3, 2)
g.set_n_repr({'X': X})
##############################################################################
# You can also efficiently get the node states as a dictionary of tensors.
# The dictionary will also have strings as keys and stacked tensors as values.
# Getting all node states. The tensors will be stacked along the first
# dimension, in the same order as node ID.
n_repr = g.get_n_repr()
X_ = n_repr['X']
Y_ = n_repr['Y']
assert torch.allclose(X_, X)
assert torch.allclose(Y_, Y)
# You can also get the states from a subset of nodes. The tensors will be
# stacked along the first dimension, in the same order as what you feed in.
n_repr_subset = g.get_n_repr([0, 2])
X_ = n_repr_subset['X']
Y_ = n_repr_subset['Y']
assert torch.allclose(X_, X[[0, 2]])
assert torch.allclose(Y_, Y[[0, 2]])
##############################################################################
# Setting/getting edge states is very similar. We provide two ways of reading
# and writing edge states: by source-destination pairs, and by edge ID.
# We are going to assign each edge a state A and a state B, both of which are
# 3-D vectors for each edge.
A = torch.randn(3, 3)
B = torch.randn(3, 3)
# You can either set the states of all edges...
g.set_e_repr({'A': A, 'B': B})
# ... or by source-destination pair (in this case, assigning A[0] to (0 -> 1)
# and A[2] to (2 -> 0) ...
g.set_e_repr({'A': A[[0, 2]], 'B': B[[0, 2]]}, [0, 2], [1, 0])
# ... or by edge ID (#0 and #2)
g.set_e_repr_by_id({'A': A[[0, 2]], 'B': B[[0, 2]]}, [0, 2])
# Note that the latter two options are available only if you have set at least
# one field on all edges.
# TODO: do we want to fix this behavior to allow initial partial setting?
# Getting edge states is also easy...
e_repr = g.get_e_repr()
A_ = e_repr['A']
assert torch.allclose(A_, A)
# ... and you can also do it either by specifying source-destination pair...
e_repr_subset = g.get_e_repr([0], [1])
assert torch.allclose(e_repr_subset['A'], A[[0]])
# ... or by edge ID
e_repr_subset = g.get_e_repr_by_id([0])
assert torch.allclose(e_repr_subset['A'], A[[0]])
##############################################################################
# One can also remove node/edge states from the graph. This is particularly
# useful to save memory during inference.
B_ = g.pop_e_repr('B')
assert torch.allclose(B_, B)
##############################################################################
# Section 3. Updating node/edge states
# ------------------------------------
# The most direct way to update node/edge states is by getting/setting the
# states directly. Of course, you can update the states on a subset of
# nodes and/or edges this way.
X_new = g.get_n_repr()['X'] + 2
g.set_n_repr({'X': X_new})
##############################################################################
# A better structured implementation would wrap the update procedure as a
# function/module, to decouple the update logic from the rest of the system.
def updateX(node_state_dict):
return {'X': node_state_dict['X'] + 2}
g.set_n_repr(updateX(g.get_n_repr()))
##############################################################################
# If your node state update function is a **node-wise map** operation (i.e.
# the update on a single node only depends on the current state of that
# particular node), you can also call ``apply_nodes`` method.
#
# .. note::
# In distributed computation,
g.apply_nodes(apply_node_func=updateX)
# You can also update node states partially
g.apply_nodes(v=[0, 1], apply_node_func=updateX)
##############################################################################
# For edges, DGL also has an ``apply_edges`` method for **edge-wise map**
# operations.
def updateA(edge_state_dict):
return {'A': edge_state_dict['A'] + 2}
g.apply_edges(apply_edge_func=updateA)
# You can also update edge states by specifying endpoints or edge IDs
g.apply_edges(u=[0, 2], v=[1, 0], apply_edge_func=updateA)
g.apply_edges(eid=[0, 2], apply_edge_func=updateA)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment