[hotfix] Refactor edge softmax module (#1967)

* upd * upd * upd * upd * upd * upd * upd * upd

[hotfix] Refactor edge softmax module (#1967)
* upd * upd * upd * upd * upd * upd * upd * upd
18bfec24 · Zihao Ye · GitHub · 3611a66e · 18bfec24 · 18bfec24
Unverified Commit 18bfec24 authored Aug 07, 2020 by Zihao Ye Committed by GitHub Aug 07, 2020
4 changed files
--- a/tests/mxnet/test_nn.py
+++ b/tests/mxnet/test_nn.py
@@ -572,58 +572,6 @@ def test_simple_pool():
    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2
-def uniform_attention(g, shape):
-    a = mx.nd.ones(shape).as_in_context(g.device)
-    target_shape = (g.number_of_edges(),) + (1,) * (len(shape) - 1)
-    return a / g.in_degrees(g.edges()[1]).reshape(target_shape).astype('float32')
-def test_edge_softmax():
-    # Basic
-    g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
-    edata = F.ones((g.number_of_edges(), 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert np.allclose(a.asnumpy(), uniform_attention(g, a.shape).asnumpy(),
-            1e-4, 1e-4)
-    # Test higher dimension case
-    edata = F.ones((g.number_of_edges(), 3, 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert np.allclose(a.asnumpy(), uniform_attention(g, a.shape).asnumpy(),
-            1e-4, 1e-4)
-def test_partial_edge_softmax():
-    g = dgl.DGLGraph().to(F.ctx())
-    g.add_nodes(30)
-    # build a complete graph
-    for i in range(30):
-        for j in range(30):
-            g.add_edge(i, j)
-    score = F.randn((300, 1))
-    score.attach_grad()
-    grad = F.randn((300, 1))
-    import numpy as np
-    eids = F.tensor(np.random.choice(900, 300, replace=False), g.idtype)
-    # compute partial edge softmax
-    with mx.autograd.record():
-        y_1 = nn.edge_softmax(g, score, eids)
-        y_1.backward(grad)
-        grad_1 = score.grad
-    # compute edge softmax on edge subgraph
-    subg = g.edge_subgraph(eids, preserve_nodes=True)
-    with mx.autograd.record():
-        y_2 = nn.edge_softmax(subg, score)
-        y_2.backward(grad)
-        grad_2 = score.grad
-    assert F.allclose(y_1, y_2)
-    assert F.allclose(grad_1, grad_2)
 def test_rgcn():
    ctx = F.ctx()
    etype = []
@@ -848,8 +796,6 @@ if __name__ == '__main__':
    test_gmm_conv()
    test_nn_conv()
    test_sg_conv()
-    test_edge_softmax()
-    test_partial_edge_softmax()
    test_set2set()
    test_glob_att_pool()
    test_simple_pool()

--- a/tests/pytorch/test_nn.py
+++ b/tests/pytorch/test_nn.py
@@ -289,76 +289,6 @@ def test_set_trans():
    h2 = st_dec(bg, h1)
    assert h2.shape[0] == 3 and h2.shape[1] == 200 and h2.dim() == 2
-def uniform_attention(g, shape):
-    a = F.ones(shape)
-    target_shape = (g.number_of_edges(),) + (1,) * (len(shape) - 1)
-    return a / g.in_degrees(g.edges(order='eid')[1]).view(target_shape).float()
-@parametrize_dtype
-def test_edge_softmax(idtype):
-    # Basic
-    g = dgl.graph(nx.path_graph(3))
-    g = g.astype(idtype).to(F.ctx())
-    edata = F.ones((g.number_of_edges(), 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert F.allclose(a, uniform_attention(g, a.shape))
-    # Test higher dimension case
-    edata = F.ones((g.number_of_edges(), 3, 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert F.allclose(a, uniform_attention(g, a.shape))
-    # Test both forward and backward with PyTorch built-in softmax.
-    g = dgl.rand_graph(30, 900)
-    g = g.astype(idtype).to(F.ctx())
-    score = F.randn((900, 1))
-    score.requires_grad_()
-    grad = F.randn((900, 1))
-    y = F.softmax(score.view(30, 30), dim=0).view(-1, 1)
-    y.backward(grad)
-    grad_score = score.grad
-    score.grad.zero_()
-    y_dgl = nn.edge_softmax(g, score)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    # check forward
-    assert F.allclose(y_dgl, y)
-    y_dgl.backward(grad)
-    # checkout gradient
-    assert F.allclose(score.grad, grad_score)
-    print(score.grad[:10], grad_score[:10])
-@parametrize_dtype
-def test_partial_edge_softmax(idtype):
-    g = dgl.rand_graph(30, 900)
-    g = g.astype(idtype).to(F.ctx())
-    score = F.randn((300, 1))
-    score.requires_grad_()
-    grad = F.randn((300, 1))
-    import numpy as np
-    eids = np.random.choice(900, 300, replace=False)
-    eids = F.tensor(eids, dtype=g.idtype)
-    # compute partial edge softmax
-    y_1 = nn.edge_softmax(g, score, eids)
-    y_1.backward(grad)
-    grad_1 = score.grad
-    score.grad.zero_()
-    # compute edge softmax on edge subgraph
-    subg = g.edge_subgraph(eids, preserve_nodes=True)
-    y_2 = nn.edge_softmax(subg, score)
-    y_2.backward(grad)
-    grad_2 = score.grad
-    score.grad.zero_()
-    assert F.allclose(y_1, y_2)
-    assert F.allclose(grad_1, grad_2)
 def test_rgcn():
    ctx = F.ctx()
    etype = []
@@ -936,8 +866,6 @@ def test_hetero_conv(agg, idtype):
 if __name__ == '__main__':
    test_graph_conv()
-    test_edge_softmax()
-    test_partial_edge_softmax()
    test_set2set()
    test_glob_att_pool()
    test_simple_pool()

--- a/tests/tensorflow/test_nn.py
+++ b/tests/tensorflow/test_nn.py
@@ -161,85 +161,6 @@ def test_simple_pool():
    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2
-def uniform_attention(g, shape):
-    a = F.ones(shape)
-    target_shape = (g.number_of_edges(),) + (1,) * (len(shape) - 1)
-    return a / tf.cast(tf.reshape(g.in_degrees(g.edges()[1]), target_shape), tf.float32)
-def test_edge_softmax():
-    # Basic
-    g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
-    edata = F.ones((g.number_of_edges(), 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert F.allclose(a, uniform_attention(g, a.shape))
-    # Test higher dimension case
-    edata = F.ones((g.number_of_edges(), 3, 1))
-    a = nn.edge_softmax(g, edata)
-    assert len(g.ndata) == 0
-    assert len(g.edata) == 0
-    assert F.allclose(a, uniform_attention(g, a.shape))
-    # Test both forward and backward with Tensorflow built-in softmax.
-    g = dgl.DGLGraph().to(F.ctx())
-    g.add_nodes(30)
-    # build a complete graph
-    for i in range(30):
-        for j in range(30):
-            g.add_edge(i, j)
-    score = F.randn((900, 1))
-    with tf.GradientTape() as tape:
-        tape.watch(score)
-        grad = F.randn((900, 1))
-        y = tf.reshape(F.softmax(tf.reshape(score,(30, 30)), dim=0), (-1, 1))
-        grads = tape.gradient(y, [score])
-        grad_score = grads[0]
-    with tf.GradientTape() as tape:
-        tape.watch(score)
-        y_dgl = nn.edge_softmax(g, score)
-        assert len(g.ndata) == 0
-        assert len(g.edata) == 0
-        # check forward
-        assert F.allclose(y_dgl, y)
-        grads = tape.gradient(y_dgl, [score])
-    # checkout gradient
-    assert F.allclose(grads[0], grad_score)
-    print(grads[0][:10], grad_score[:10])
-def test_partial_edge_softmax():
-    g = dgl.DGLGraph().to(F.ctx())
-    g.add_nodes(30)
-    # build a complete graph
-    for i in range(30):
-        for j in range(30):
-            g.add_edge(i, j)
-    score = F.randn((300, 1))
-    grad = F.randn((300, 1))
-    import numpy as np
-    eids = np.random.choice(900, 300, replace=False).astype('int64')
-    eids = F.tensor(eids)
-    # compute partial edge softmax
-    with tf.GradientTape() as tape:
-        tape.watch(score)
-        y_1 = nn.edge_softmax(g, score, eids)
-        grads = tape.gradient(y_1, [score])
-    grad_1 = grads[0]
-    # compute edge softmax on edge subgraph
-    subg = g.edge_subgraph(eids, preserve_nodes=True)
-    with tf.GradientTape() as tape:
-        tape.watch(score)
-        y_2 = nn.edge_softmax(subg, score)
-        grads = tape.gradient(y_2, [score])
-    grad_2 = grads[0]
-    assert F.allclose(y_1, y_2)
-    assert F.allclose(grad_1, grad_2)
 def test_glob_att_pool():
    g = dgl.DGLGraph(nx.path_graph(10)).to(F.ctx())
@@ -552,8 +473,6 @@ def test_hetero_conv(agg, idtype):
 if __name__ == '__main__':
    test_graph_conv()
-    test_edge_softmax()
-    test_partial_edge_softmax()
    # test_set2set()
    test_glob_att_pool()
    test_simple_pool()

--- a/tests/test_utils/graph_cases.py
+++ b/tests/test_utils/graph_cases.py
@@ -88,6 +88,11 @@ def block_graph1():
        })
    return dgl.to_block(g)
+@register_case(['clique'])
+def clique():
+    g = dgl.graph(([0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]))
+    return g
 def random_dglgraph(size):
    return dgl.DGLGraph(nx.erdos_renyi_graph(size, 0.3))