[Fix] Bugfix in docs and check gradient of edge_softmax in test (#686)

* upd * Add stronger test * Update test_nn.py * Update test_nn.py * trigger * trigger * trigger

[Fix] Bugfix in docs and check gradient of edge_softmax in test (#686)
* upd * Add stronger test * Update test_nn.py * Update test_nn.py * trigger * trigger * trigger
8ecedfb3 · Zihao Ye · GitHub · c3d74e0f · 8ecedfb3 · 8ecedfb3
Unverified Commit 8ecedfb3 authored Jun 27, 2019 by Zihao Ye Committed by GitHub Jun 27, 2019
Showing with 45 additions and 2 deletions

python/dgl/function/message.py python/dgl/function/message.py +1 -1

python/dgl/nn/pytorch/softmax.py python/dgl/nn/pytorch/softmax.py +1 -1

tests/pytorch/test_nn.py tests/pytorch/test_nn.py +43 -0

No files found.
--- a/python/dgl/function/message.py
+++ b/python/dgl/function/message.py
@@ -214,7 +214,7 @@ _register_builtin_message_func()

 def src_mul_edge(src, edge, out):
    """Builtin message function that computes message by performing
-    binary operation mul between src feature and dst feature.
+    binary operation mul between src feature and edge feature.

    Notes
    -----

--- a/python/dgl/nn/pytorch/softmax.py
+++ b/python/dgl/nn/pytorch/softmax.py
@@ -58,7 +58,7 @@ class EdgeSoftmax(th.autograd.Function):
        out = dgl.EData(g, out)
        sds = out * grad_out  # type dgl.EData
        sds_sum = sds.dst_sum()  # type dgl.NData
-        grad_score = sds - sds * sds_sum  # multiple expressions
+        grad_score = sds - out * sds_sum  # multiple expressions
        return grad_score.data
        """
        g, out = ctx.backward_cache

--- a/tests/pytorch/test_nn.py
+++ b/tests/pytorch/test_nn.py
@@ -4,6 +4,9 @@ import dgl
 import dgl.nn.pytorch as nn
 from copy import deepcopy

+import numpy as np
+import scipy as sp
+
 def _AXWb(A, X, W, b):
    X = th.matmul(X, W)
    Y = th.matmul(A, X.view(X.shape[0], -1)).view_as(X)
@@ -63,6 +66,46 @@ def test_edge_softmax():
    a = nn.edge_softmax(g, edata)
    assert th.allclose(a, uniform_attention(g, a.shape))

+    # Test both forward and backward with PyTorch built-in softmax.
+    g = dgl.DGLGraph()
+    g.add_nodes(30)
+    # build a complete graph
+    for i in range(30):
+        for j in range(30):
+            g.add_edge(i, j)
+
+    score = th.rand(900, 1)
+    score.requires_grad_()
+    grad = th.rand(900, 1)
+    y = th.softmax(score.view(30, 30), dim=0).view(-1, 1)
+    y.backward(grad)
+    grad_score = score.grad
+    score.grad.zero_()
+    y_dgl = nn.edge_softmax(g, score)
+    # check forward
+    assert th.allclose(y_dgl, y)
+    y_dgl.backward(grad)
+    # checkout gradient
+    assert th.allclose(score.grad, grad_score)
+    print(score.grad[:10], grad_score[:10])
+    
+    # Test 2
+    def generate_rand_graph(n):
+      arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
+      return dgl.DGLGraph(arr, readonly=True)
+    
+    g = generate_rand_graph(50)
+    a1 = th.randn(g.number_of_edges(), 1).requires_grad_()
+    a2 = a1.clone().detach().requires_grad_()
+    g.edata['s'] = a1
+    g.group_apply_edges('dst', lambda edges: {'ss':th.softmax(edges.data['s'], 1)})
+    g.edata['ss'].sum().backward()
+    
+    builtin_sm = nn.edge_softmax(g, a2)
+    builtin_sm.sum().backward()
+    print(a1.grad - a2.grad)
+    assert th.allclose(a1.grad, a2.grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend
+    

 if __name__ == '__main__':
    test_graph_conv()