upd (#741)

742d79a7 · Zihao Ye · GitHub · 5d3f470b · 742d79a7 · 742d79a7
Unverified Commit 742d79a7 authored Aug 06, 2019 by Zihao Ye Committed by GitHub Aug 06, 2019
4 changed files
--- a/docs/source/api/python/nn.mxnet.rst
+++ b/docs/source/api/python/nn.mxnet.rst
@@ -17,12 +17,10 @@ dgl.nn.mxnet.glob
 .. automodule:: dgl.nn.mxnet.glob
    :members:
+    :show-inheritance:
 dgl.nn.mxnet.softmax
 --------------------
 .. automodule:: dgl.nn.mxnet.softmax
+    :members: edge_softmax
-.. autoclass:: dgl.nn.mxnet.softmax.EdgeSoftmax
-    :members: forward
-    :show-inheritance:
--- a/docs/source/api/python/nn.pytorch.rst
+++ b/docs/source/api/python/nn.pytorch.rst
@@ -14,7 +14,6 @@ dgl.nn.pytorch.conv
 dgl.nn.pytorch.glob
 -------------------
 .. automodule:: dgl.nn.pytorch.glob
 .. autoclass:: dgl.nn.pytorch.glob.SumPooling
@@ -53,7 +52,4 @@ dgl.nn.pytorch.softmax
 ----------------------
 .. automodule:: dgl.nn.pytorch.softmax
+    :members: edge_softmax
-.. autoclass:: dgl.nn.pytorch.softmax.EdgeSoftmax
-    :members: forward
-    :show-inheritance:
--- a/python/dgl/nn/mxnet/softmax.py
+++ b/python/dgl/nn/mxnet/softmax.py
@@ -32,12 +32,15 @@ class EdgeSoftmax(mx.autograd.Function):
        """Forward function.
        Pseudo-code:
-        score = dgl.EData(g, score)
-        score_max = score.dst_max()  # of type dgl.NData
+        .. code:: python
-        score = score - score_max  # edge_sub_dst, ret dgl.EData
-        score_sum = score.dst_sum()  # of type dgl.NData
+            score = dgl.EData(g, score)
-        out = score / score_sum    # edge_div_dst, ret dgl.EData
+            score_max = score.dst_max()  # of type dgl.NData
-        return out.data
+            score = score - score_max  # edge_sub_dst, ret dgl.EData
+            score_sum = score.dst_sum()  # of type dgl.NData
+            out = score / score_sum    # edge_div_dst, ret dgl.EData
+            return out.data
        """
        g = self.g.local_var()
        g.edata['s'] = score
@@ -54,12 +57,15 @@ class EdgeSoftmax(mx.autograd.Function):
        """Backward function.
        Pseudo-code:
-        g, out = ctx.backward_cache
-        grad_out = dgl.EData(g, grad_out)
+        .. code:: python
-        out = dgl.EData(g, out)
-        sds = out * grad_out  # type dgl.EData
+            g, out = ctx.backward_cache
-        sds_sum = sds.dst_sum()  # type dgl.NData
+            grad_out = dgl.EData(g, grad_out)
-        grad_score = sds - sds * sds_sum  # multiple expressions
+            out = dgl.EData(g, out)
+            sds = out * grad_out  # type dgl.EData
+            sds_sum = sds.dst_sum()  # type dgl.NData
+            grad_score = sds - sds * sds_sum  # multiple expressions
        """
        g = self.g.local_var()
        out, = self.saved_tensors  # pylint: disable=access-member-before-definition, unpacking-non-sequence
@@ -75,6 +81,19 @@ class EdgeSoftmax(mx.autograd.Function):
 def edge_softmax(graph, logits):
    r"""Compute edge softmax.
+    For a node :math:`i`, edge softmax is an operation of computing
+    .. math::
+      a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})}
+    where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also
+    called logits in the context of softmax. :math:`\mathcal{N}(i)` is
+    the set of nodes that have an edge to :math:`i`.
+    An example of using edge softmax is in
+    `Graph Attention Network <https://arxiv.org/pdf/1710.10903.pdf>`__ where
+    the attention weights are computed with such an edge softmax operation.
    Parameters
    ----------
    graph : DGLGraph
@@ -90,13 +109,40 @@ def edge_softmax(graph, logits):
    Notes
    -----
        * Input shape: :math:`(N, *, 1)` where * means any number of
-            additional dimensions, :math:`N` is the number of edges.
+          additional dimensions, :math:`N` is the number of edges.
        * Return shape: :math:`(N, *, 1)`
    Examples
    --------
-    >>> import dgl.function as fn
+    >>> from dgl.nn.mxnet.softmax import edge_softmax
-    >>> attention = EdgeSoftmax(logits, graph)
+    >>> import dgl
+    >>> from mxnet import nd
+    Create a :code:`DGLGraph` object and initialize its edge features.
+    >>> g = dgl.DGLGraph()
+    >>> g.add_nodes(3)
+    >>> g.add_edges([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])
+    >>> edata = nd.ones((6, 1))
+    >>> edata
+    [[1.]
+     [1.]
+     [1.]
+     [1.]
+     [1.]
+     [1.]]
+    <NDArray 6x1 @cpu(0)>
+    Apply edge softmax on g:
+    >>> edge_softmax(g, edata)
+    [[1.        ]
+     [0.5       ]
+     [0.33333334]
+     [0.5       ]
+     [0.33333334]
+     [0.33333334]]
+    <NDArray 6x1 @cpu(0)>
    """
    softmax_op = EdgeSoftmax(graph)
    return softmax_op(logits)
--- a/python/dgl/nn/pytorch/softmax.py
+++ b/python/dgl/nn/pytorch/softmax.py
@@ -29,12 +29,15 @@ class EdgeSoftmax(th.autograd.Function):
        """Forward function.
        Pseudo-code:
-        score = dgl.EData(g, score)
-        score_max = score.dst_max()  # of type dgl.NData
+        .. code:: python
-        score = score - score_max  # edge_sub_dst, ret dgl.EData
-        score_sum = score.dst_sum()  # of type dgl.NData
+            score = dgl.EData(g, score)
-        out = score / score_sum    # edge_div_dst, ret dgl.EData
+            score_max = score.dst_max()  # of type dgl.NData
-        return out.data
+            score = score - score_max  # edge_sub_dst, ret dgl.EData
+            score_sum = score.dst_sum()  # of type dgl.NData
+            out = score / score_sum    # edge_div_dst, ret dgl.EData
+            return out.data
        """
        # remember to save the graph to backward cache before making it
        # a local variable
@@ -55,13 +58,16 @@ class EdgeSoftmax(th.autograd.Function):
        """Backward function.
        Pseudo-code:
-        g, out = ctx.backward_cache
-        grad_out = dgl.EData(g, grad_out)
+        .. code:: python
-        out = dgl.EData(g, out)
-        sds = out * grad_out  # type dgl.EData
+            g, out = ctx.backward_cache
-        sds_sum = sds.dst_sum()  # type dgl.NData
+            grad_out = dgl.EData(g, grad_out)
-        grad_score = sds - sds * sds_sum  # multiple expressions
+            out = dgl.EData(g, out)
-        return grad_score.data
+            sds = out * grad_out  # type dgl.EData
+            sds_sum = sds.dst_sum()  # type dgl.NData
+            grad_score = sds - sds * sds_sum  # multiple expressions
+            return grad_score.data
        """
        g = ctx.backward_cache
        g = g.local_var()
@@ -79,6 +85,19 @@ class EdgeSoftmax(th.autograd.Function):
 def edge_softmax(graph, logits):
    r"""Compute edge softmax.
+    For a node :math:`i`, edge softmax is an operation of computing
+    .. math::
+      a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})}
+    where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also
+    called logits in the context of softmax. :math:`\mathcal{N}(i)` is
+    the set of nodes that have an edge to :math:`i`.
+    An example of using edge softmax is in
+    `Graph Attention Network <https://arxiv.org/pdf/1710.10903.pdf>`__ where
+    the attention weights are computed with such an edge softmax operation.
    Parameters
    ----------
    graph : DGLGraph
@@ -94,12 +113,37 @@ def edge_softmax(graph, logits):
    Notes
    -----
        * Input shape: :math:`(N, *, 1)` where * means any number of
-            additional dimensions, :math:`N` is the number of edges.
+          additional dimensions, :math:`N` is the number of edges.
        * Return shape: :math:`(N, *, 1)`
    Examples
    --------
-    >>> import dgl.function as fn
+    >>> from dgl.nn.pytorch.softmax import edge_softmax
-    >>> attention = EdgeSoftmax(logits, graph)
+    >>> import dgl
+    >>> import torch as th
+    Create a :code:`DGLGraph` object and initialize its edge features.
+    >>> g = dgl.DGLGraph()
+    >>> g.add_nodes(3)
+    >>> g.add_edges([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])
+    >>> edata = th.ones(6, 1).float()
+    >>> edata
+    tensor([[1.],
+            [1.],
+            [1.],
+            [1.],
+            [1.],
+            [1.]])
+    Apply edge softmax on g:
+    >>> edge_softmax(g, edata)
+    tensor([[1.0000],
+        [0.5000],
+        [0.3333],
+        [0.5000],
+        [0.3333],
+        [0.3333]])
    """
    return EdgeSoftmax.apply(graph, logits)