[Distributed] Some small fixes on doc (#2049)

* quick fix. * update sparse optimizer. * fix. * fix

[Distributed] Some small fixes on doc (#2049)
* quick fix. * update sparse optimizer. * fix. * fix
9da1629c · Da Zheng · GitHub · 4edde000 · 9da1629c · 9da1629c
Unverified Commit 9da1629c authored Aug 18, 2020 by Da Zheng Committed by GitHub Aug 18, 2020
4 changed files
--- a/docs/source/api/python/dgl.distributed.rst
+++ b/docs/source/api/python/dgl.distributed.rst
@@ -17,7 +17,7 @@ Distributed Graph
 -----------------

 .. autoclass:: DistGraph
-    :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition
+    :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition, num_nodes, num_edges

 Distributed Tensor
 ------------------

--- a/python/dgl/distributed/dist_graph.py
+++ b/python/dgl/distributed/dist_graph.py
@@ -570,7 +570,7 @@ class DistGraph:
        Examples
        --------
        >>> g = dgl.distributed.DistGraph('ogb-product')
-        >>> print(g.number_of_nodes())
+        >>> print(g.num_nodes())
        2449029
        """
        return self._num_nodes
@@ -586,7 +586,7 @@ class DistGraph:
        Examples
        --------
        >>> g = dgl.distributed.DistGraph('ogb-product')
-        >>> print(g.number_of_nodes())
+        >>> print(g.num_edges())
        123718280
        """
        return self._num_edges

--- a/python/dgl/distributed/partition.py
+++ b/python/dgl/distributed/partition.py
@@ -252,11 +252,11 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
    Examples
    --------
    >>> dgl.distributed.partition_graph(g, 'test', 4, num_hops=1, part_method='metis',
-                                        out_path='output/', reshuffle=True,
-                                        balance_ntypes=g.ndata['train_mask'],
-                                        balance_edges=True)
+    ...                                 out_path='output/', reshuffle=True,
+    ...                                 balance_ntypes=g.ndata['train_mask'],
+    ...                                 balance_edges=True)
    >>> g, node_feats, edge_feats, gpb, graph_name = dgl.distributed.load_partition(
-                                        'output/test.json', 0)
+    ...                                 'output/test.json', 0)
    '''
    if num_parts == 1:
        parts = {0: g}

--- a/python/dgl/distributed/sparse_emb.py
+++ b/python/dgl/distributed/sparse_emb.py
@@ -119,13 +119,28 @@ def _init_state(shape, dtype):
    return F.zeros(shape, dtype, F.cpu())

 class SparseAdagrad:
-    ''' The sparse Adagrad optimizer.
+    r''' The sparse Adagrad optimizer.

-    This optimizer implements a sparse version of the Adagrad algorithm.
-    It works with DistEmbedding and only update the embeddings
-    involved in a mini-batch to support efficient training on a graph with many
+    This optimizer implements a lightweight version of Adagrad algorithm for optimizing
+    :func:`dgl.distributed.DistEmbedding`. In each mini-batch, it only updates the embeddings
+    involved in the mini-batch to support efficient training on a graph with many
    nodes and edges.

+    Adagrad maintains a :math:`G_{t,i,j}` for every parameter in the embeddings, where
+    :math:`G_{t,i,j}=G_{t-1,i,j} + g_{t,i,j}^2` and :math:`g_{t,i,j}` is the gradient of
+    the dimension :math:`j` of embedding :math:`i` at step :math:`t`.
+
+    Instead of maintaining :math:`G_{t,i,j}`, this implementation maintains :math:`G_{t,i}`
+    for every embedding :math:`i`:
+
+    .. math::
+      G_{t,i}=G_{t-1,i}+ \frac{1}{p} \sum_{0 \le j \lt p}g_{t,i,j}^2
+
+    where :math:`p` is the dimension size of an embedding.
+
+    The benefit of the implementation is that it consumes much smaller memory and runs
+    much faster if users' model requires learnable embeddings for nodes or edges.
+
    Parameters
    ----------
    params : list of DistEmbeddings