[NN] [Doc] Fix nn api doc (#2047)

* fix mxnet nn doc and no note on chebconv * change notes to note to highlight

[NN] [Doc] Fix nn api doc (#2047)
* fix mxnet nn doc and no note on chebconv * change notes to note to highlight
65866989 · Tianjun Xiao · GitHub · dacc7afe · 65866989 · 65866989
Unverified Commit 65866989 authored Aug 18, 2020 by Tianjun Xiao Committed by GitHub Aug 18, 2020
20 changed files
--- a/docs/source/api/python/nn.mxnet.rst
+++ b/docs/source/api/python/nn.mxnet.rst
@@ -12,7 +12,7 @@ please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with "[Fe
 If you want to contribute a NN module, please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
 with "[NN] XXXModel in MXNet NN Modules" and our team member would review this PR.
-Conv Layers 
+Conv Layers
 ----------------------------------------
 .. automodule:: dgl.nn.mxnet.conv
@@ -93,7 +93,7 @@ GMMConv
 .. autoclass:: dgl.nn.mxnet.conv.GMMConv
    :members: forward
    :show-inheritance:
 ChebConv
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -113,7 +113,7 @@ NNConv
 .. autoclass:: dgl.nn.mxnet.conv.NNConv
    :members: forward
-    :show-inheritance
+    :show-inheritance:
 Dense Conv Layers
 ----------------------------------------
@@ -130,12 +130,12 @@ DenseSAGEConv
 .. autoclass:: dgl.nn.mxnet.conv.DenseSAGEConv
    :members: forward
-    :show-inheritance
+    :show-inheritance:
 DenseChebConv
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. autoclass:: dgl.nn.pytorch.conv.DenseChebConv
+.. autoclass:: dgl.nn.mxnet.conv.DenseChebConv
    :members: forward
    :show-inheritance:

--- a/python/dgl/nn/mxnet/conv/agnnconv.py
+++ b/python/dgl/nn/mxnet/conv/agnnconv.py
@@ -42,8 +42,8 @@ class AGNNConv(nn.Block):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/mxnet/conv/chebconv.py
+++ b/python/dgl/nn/mxnet/conv/chebconv.py
@@ -44,10 +44,6 @@ class ChebConv(nn.Block):
    bias : bool, optional
        If True, adds a learnable bias to the output. Default: ``True``.
-    Note
-    ----
-    ChebConv only support DGLGraph as input for now. Heterograph will report error. To be fixed.
    Example
    -------
    >>> import dgl

--- a/python/dgl/nn/mxnet/conv/edgeconv.py
+++ b/python/dgl/nn/mxnet/conv/edgeconv.py
@@ -40,8 +40,8 @@ class EdgeConv(nn.Block):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/mxnet/conv/gatconv.py
+++ b/python/dgl/nn/mxnet/conv/gatconv.py
@@ -62,8 +62,8 @@ class GATConv(nn.Block):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Defaults: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/mxnet/conv/gatedgraphconv.py
+++ b/python/dgl/nn/mxnet/conv/gatedgraphconv.py
@@ -7,29 +7,61 @@ from mxnet.gluon import nn
 from .... import function as fn
 class GatedGraphConv(nn.Block):
-    r"""Gated Graph Convolution layer from paper `Gated Graph Sequence
+    r"""
+    Description
+    -----------
+    Gated Graph Convolution layer from paper `Gated Graph Sequence
    Neural Networks <https://arxiv.org/pdf/1511.05493.pdf>`__.
    .. math::
-        h_{i}^{0} & = [ x_i \| \mathbf{0} ]
+        h_{i}^{0} &= [ x_i \| \mathbf{0} ]
-        a_{i}^{t} & = \sum_{j\in\mathcal{N}(i)} W_{e_{ij}} h_{j}^{t}
+        a_{i}^{t} &= \sum_{j\in\mathcal{N}(i)} W_{e_{ij}} h_{j}^{t}
-        h_{i}^{t+1} & = \mathrm{GRU}(a_{i}^{t}, h_{i}^{t})
+        h_{i}^{t+1} &= \mathrm{GRU}(a_{i}^{t}, h_{i}^{t})
    Parameters
    ----------
    in_feats : int
-        Input feature size.
+        Input feature size; i.e, the number of dimensions of :math:`x_i`.
    out_feats : int
-        Output feature size.
+        Output feature size; i.e., the number of dimensions of :math:`h_i^{(t+1)}`.
    n_steps : int
-        Number of recurrent steps.
+        Number of recurrent steps; i.e, the :math:`t` in the above formula.
    n_etypes : int
        Number of edge types.
    bias : bool
        If True, adds a learnable bias to the output. Default: ``True``.
        Can only be set to True in MXNet.
+    Example
+    -------
+    >>> import dgl
+    >>> import numpy as np
+    >>> import mxnet as mx
+    >>> from dgl.nn import GatedGraphConv
+    >>>
+    >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3]))
+    >>> feat = mx.nd.ones((6, 10))
+    >>> conv = GatedGraphConv(10, 10, 2, 3)
+    >>> conv.initialize(ctx=mx.cpu(0))
+    >>> etype = mx.nd.array([0,1,2,0,1,2])
+    >>> res = conv(g, feat, etype)
+    >>> res
+    [[0.24378185 0.17402579 0.2644723  0.2740628  0.14041871 0.32523093
+    0.2703067  0.18234392 0.32777587 0.30957845]
+    [0.17872348 0.28878236 0.2509409  0.20139427 0.3355541  0.22643831
+    0.2690711  0.22341749 0.27995753 0.21575949]
+    [0.23911178 0.16696918 0.26120248 0.27397877 0.13745922 0.3223175
+    0.27561218 0.18071817 0.3251124  0.30608907]
+    [0.25242943 0.3098581  0.25249368 0.27968448 0.24624602 0.12270881
+    0.335147   0.31550157 0.19065917 0.21087633]
+    [0.17503153 0.29523152 0.2474858  0.20848347 0.3526433  0.23443702
+    0.24741334 0.21986549 0.28935105 0.21859099]
+    [0.2159364  0.26942077 0.23083271 0.28329757 0.24758333 0.24230732
+    0.23958017 0.23430146 0.26431587 0.27001363]]
+    <NDArray 6x10 @cpu(0)>
    """
    def __init__(self,
                 in_feats,

--- a/python/dgl/nn/mxnet/conv/gmmconv.py
+++ b/python/dgl/nn/mxnet/conv/gmmconv.py
@@ -55,8 +55,8 @@ class GMMConv(nn.Block):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/mxnet/conv/graphconv.py
+++ b/python/dgl/nn/mxnet/conv/graphconv.py
@@ -58,8 +58,8 @@ class GraphConv(gluon.Block):
    bias : torch.Tensor
        The learnable bias tensor.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if
@@ -208,8 +208,8 @@ class GraphConv(gluon.Block):
            since no message will be passed to those nodes. This will cause invalid output.
            The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``.
-        Notes
+        Note
-        -----
+        ----
        * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional
          dimensions, :math:`N` is the number of nodes.
        * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are

--- a/python/dgl/nn/mxnet/conv/sgconv.py
+++ b/python/dgl/nn/mxnet/conv/sgconv.py
@@ -50,8 +50,8 @@ class SGConv(nn.Block):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if
@@ -148,8 +148,8 @@ class SGConv(nn.Block):
            since no message will be passed to those nodes. This will cause invalid output.
            The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``.
-        Notes
+        Note
-        -----
+        ----
        If ``cache`` is set to True, ``feat`` and ``graph`` should not change during
        training, or you will get wrong results.
        """

--- a/python/dgl/nn/pytorch/conv/agnnconv.py
+++ b/python/dgl/nn/pytorch/conv/agnnconv.py
@@ -42,8 +42,8 @@ class AGNNConv(nn.Module):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/pytorch/conv/atomicconv.py
+++ b/python/dgl/nn/pytorch/conv/atomicconv.py
@@ -191,8 +191,8 @@ class AtomicConv(nn.Module):
        In the original paper, these are atomic numbers to consider, representing the types
        of atoms. T for the number of types of atomic numbers. Default to None.
-    Notes
+    Note
-    -----
+    ----
    * This convolution operation is designed for molecular graphs in Chemistry, but it might
      be possible to extend it to more general graphs.

--- a/python/dgl/nn/pytorch/conv/chebconv.py
+++ b/python/dgl/nn/pytorch/conv/chebconv.py
@@ -44,10 +44,6 @@ class ChebConv(nn.Module):
    bias : bool, optional
        If True, adds a learnable bias to the output. Default: ``True``.
-    Note
-    ----
-    ChebConv only support DGLGraph as input for now. Heterograph will report error. To be fixed.
    Example
    -------
    >>> import dgl

--- a/python/dgl/nn/pytorch/conv/dotgatconv.py
+++ b/python/dgl/nn/pytorch/conv/dotgatconv.py
@@ -48,8 +48,8 @@ class DotGatConv(nn.Module):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/pytorch/conv/edgeconv.py
+++ b/python/dgl/nn/pytorch/conv/edgeconv.py
@@ -39,8 +39,8 @@ class EdgeConv(nn.Module):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if

--- a/python/dgl/nn/pytorch/conv/gatconv.py
+++ b/python/dgl/nn/pytorch/conv/gatconv.py
@@ -61,8 +61,8 @@ class GATConv(nn.Module):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Defaults: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if
@@ -178,8 +178,8 @@ class GATConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The fc weights :math:`W^{(l)}` are initialized using Glorot uniform initialization.
        The attention weights are using xavier initialization method.
        """

--- a/python/dgl/nn/pytorch/conv/gatedgraphconv.py
+++ b/python/dgl/nn/pytorch/conv/gatedgraphconv.py
@@ -85,8 +85,8 @@ class GatedGraphConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The model parameters are initialized using Glorot uniform initialization
        and the bias is initialized to be zero.
        """

--- a/python/dgl/nn/pytorch/conv/gmmconv.py
+++ b/python/dgl/nn/pytorch/conv/gmmconv.py
@@ -54,8 +54,8 @@ class GMMConv(nn.Module):
        0-in-degree nodes in input graph. By setting ``True``, it will suppress the check
        and let the users handle it by themselves. Default: ``False``.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if
@@ -155,8 +155,8 @@ class GMMConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The fc parameters are initialized using Glorot uniform initialization
        and the bias is initialized to be zero.
        The mu weight is initialized using normal distribution and

--- a/python/dgl/nn/pytorch/conv/graphconv.py
+++ b/python/dgl/nn/pytorch/conv/graphconv.py
@@ -58,8 +58,8 @@ class GraphConv(nn.Module):
    bias : torch.Tensor
        The learnable bias tensor.
-    Notes
+    Note
-    -----
+    ----
    Zero in-degree nodes will lead to invalid output value. This is because no message
    will be passed to those nodes, the aggregation function will be appied on empty input.
    A common practice to avoid this is to add a self-loop for each node in the graph if
@@ -158,8 +158,8 @@ class GraphConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The model parameters are initialized as in the
        `original implementation <https://github.com/tkipf/gcn/blob/master/gcn/layers.py>`__
        where the weight :math:`W^{(l)}` is initialized using Glorot uniform initialization
@@ -223,8 +223,8 @@ class GraphConv(nn.Module):
            External weight is provided while at the same time the module
            has defined its own weight parameter.
-        Notes
+        Note
-        -----
+        ----
        * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional
          dimensions, :math:`N` is the number of nodes.
        * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are

--- a/python/dgl/nn/pytorch/conv/nnconv.py
+++ b/python/dgl/nn/pytorch/conv/nnconv.py
@@ -128,8 +128,8 @@ class NNConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The model parameters are initialized using Glorot uniform initialization
        and the bias is initialized to be zero.
        """

--- a/python/dgl/nn/pytorch/conv/sageconv.py
+++ b/python/dgl/nn/pytorch/conv/sageconv.py
@@ -121,8 +121,8 @@ class SAGEConv(nn.Module):
        -----------
        Reinitialize learnable parameters.
-        Notes
+        Note
-        -----
+        ----
        The linear weights :math:`W^{(l)}` are initialized using Glorot uniform initialization.
        The LSTM module is using xavier initialization method for its weights.
        """