Unverified Commit 9da1629c authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Distributed] Some small fixes on doc (#2049)

* quick fix.

* update sparse optimizer.

* fix.

* fix
parent 4edde000
......@@ -17,7 +17,7 @@ Distributed Graph
-----------------
.. autoclass:: DistGraph
:members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition
:members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition, num_nodes, num_edges
Distributed Tensor
------------------
......
......@@ -570,7 +570,7 @@ class DistGraph:
Examples
--------
>>> g = dgl.distributed.DistGraph('ogb-product')
>>> print(g.number_of_nodes())
>>> print(g.num_nodes())
2449029
"""
return self._num_nodes
......@@ -586,7 +586,7 @@ class DistGraph:
Examples
--------
>>> g = dgl.distributed.DistGraph('ogb-product')
>>> print(g.number_of_nodes())
>>> print(g.num_edges())
123718280
"""
return self._num_edges
......
......@@ -252,11 +252,11 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
Examples
--------
>>> dgl.distributed.partition_graph(g, 'test', 4, num_hops=1, part_method='metis',
out_path='output/', reshuffle=True,
balance_ntypes=g.ndata['train_mask'],
balance_edges=True)
... out_path='output/', reshuffle=True,
... balance_ntypes=g.ndata['train_mask'],
... balance_edges=True)
>>> g, node_feats, edge_feats, gpb, graph_name = dgl.distributed.load_partition(
'output/test.json', 0)
... 'output/test.json', 0)
'''
if num_parts == 1:
parts = {0: g}
......
......@@ -119,13 +119,28 @@ def _init_state(shape, dtype):
return F.zeros(shape, dtype, F.cpu())
class SparseAdagrad:
''' The sparse Adagrad optimizer.
r''' The sparse Adagrad optimizer.
This optimizer implements a sparse version of the Adagrad algorithm.
It works with DistEmbedding and only update the embeddings
involved in a mini-batch to support efficient training on a graph with many
This optimizer implements a lightweight version of Adagrad algorithm for optimizing
:func:`dgl.distributed.DistEmbedding`. In each mini-batch, it only updates the embeddings
involved in the mini-batch to support efficient training on a graph with many
nodes and edges.
Adagrad maintains a :math:`G_{t,i,j}` for every parameter in the embeddings, where
:math:`G_{t,i,j}=G_{t-1,i,j} + g_{t,i,j}^2` and :math:`g_{t,i,j}` is the gradient of
the dimension :math:`j` of embedding :math:`i` at step :math:`t`.
Instead of maintaining :math:`G_{t,i,j}`, this implementation maintains :math:`G_{t,i}`
for every embedding :math:`i`:
.. math::
G_{t,i}=G_{t-1,i}+ \frac{1}{p} \sum_{0 \le j \lt p}g_{t,i,j}^2
where :math:`p` is the dimension size of an embedding.
The benefit of the implementation is that it consumes much smaller memory and runs
much faster if users' model requires learnable embeddings for nodes or edges.
Parameters
----------
params : list of DistEmbeddings
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment