[Doc] data (#198)

* Fix dataset * Track data APIs

[Doc] data (#198)
* Fix dataset * Track data APIs
3a0f86a6 · Mufei Li · Minjie Wang · 485f6d3a · 3a0f86a6 · 3a0f86a6
Commit 3a0f86a6 authored Dec 01, 2018 by Mufei Li Committed by Minjie Wang Nov 30, 2018
4 changed files
--- a/docs/source/api/python/data.rst
+++ b/docs/source/api/python/data.rst
+.. _apidata:
+Dataset
+=======
+.. currentmodule:: dgl.data
+Utils
+-----
+.. autosummary::
+    :toctree: ../../generated/
+    utils.get_download_dir
+    utils.download
+    utils.check_sha1
+    utils.extract_archive
+Dataset Classes
+---------------
+Stanford sentiment treebank dataset
+```````````````````````````````````
+For more information about the dataset, see `Sentiment Analysis <https://nlp.stanford.edu/sentiment/index.html>`__.
+.. autoclass:: SST
+    :members: __getitem__, __len__
\ No newline at end of file
--- a/docs/source/api/python/index.rst
+++ b/docs/source/api/python/index.rst
@@ -10,3 +10,4 @@ API Reference
   traversal
   propagate
   udf
+   data
--- a/python/dgl/data/tree.py
+++ b/python/dgl/data/tree.py
@@ -26,14 +26,14 @@ class SST(object):
    """Stanford Sentiment Treebank dataset.
    Each sample is the constituency tree of a sentence. The leaf nodes
-    represent words. The word is a int value stored in the "x" feature field.
+    represent words. The word is a int value stored in the ``x`` feature field.
-    The non-leaf node has a special value PAD_WORD.
+    The non-leaf node has a special value ``PAD_WORD`` in the ``x`` field.
    Each node also has a sentiment annotation: 5 classes (very negative,
    negative, neutral, positive and very positive). The sentiment label is a
-    int value stored in the "y" feature field.
+    int value stored in the ``y`` feature field.
    .. note::
-        This dataset class is compatible with pytorch's Dataset class.
+        This dataset class is compatible with pytorch's :class:`Dataset` class.
    .. note::
        All the samples will be loaded and preprocessed in the memory first.
@@ -41,7 +41,7 @@ class SST(object):
    Parameters
    ----------
    mode : str, optional
-        Can be 'train', 'val', 'test'. Which data file to use.
+        Can be ``'train'``, ``'val'``, ``'test'`` and specifies which data file to use.
    vocab_file : str, optional
        Optional vocabulary file.
    """
@@ -120,9 +120,28 @@ class SST(object):
        return ret
    def __getitem__(self, idx):
+        """Get the tree with index idx.
+        Parameters
+        ----------
+        idx : int
+            Tree index.
+        Returns
+        -------
+        dgl.DGLGraph
+            Tree.
+        """
        return self.trees[idx]
    def __len__(self):
+        """Get the number of trees in the dataset.
+        Returns
+        -------
+        int
+            Number of trees.
+        """
        return len(self.trees)
    @property

--- a/python/dgl/data/utils.py
+++ b/python/dgl/data/utils.py
@@ -14,24 +14,24 @@ except ImportError:
    requests = requests_failed_to_import
 def download(url, path=None, overwrite=False, sha1_hash=None, retries=5, verify_ssl=True):
-    """Download an given URL
+    """Download a given URL.
    Codes borrowed from mxnet/gluon/utils.py
    Parameters
    ----------
    url : str
-        URL to download
+        URL to download.
    path : str, optional
        Destination path to store downloaded file. By default stores to the
-        current directory with same name as in url.
+        current directory with the same name as in url.
    overwrite : bool, optional
-        Whether to overwrite destination file if already exists.
+        Whether to overwrite the destination file if it already exists.
    sha1_hash : str, optional
        Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
        but doesn't match.
    retries : integer, default 5
-        The number of times to attempt the download in case of failure or non 200 return codes
+        The number of times to attempt downloading in case of failure or non 200 return codes.
    verify_ssl : bool, default True
        Verify SSL certificates.
@@ -101,6 +101,7 @@ def check_sha1(filename, sha1_hash):
        Path to the file.
    sha1_hash : str
        Expected sha1 hash in hexadecimal digits.
    Returns
    -------
    bool
@@ -117,14 +118,14 @@ def check_sha1(filename, sha1_hash):
    return sha1.hexdigest() == sha1_hash
 def extract_archive(file, target_dir):
-    """Extract archive file
+    """Extract archive file.
    Parameters
    ----------
    file : str
        Absolute path of the archive file.
    target_dir : str
-        Target directory of the archive to be uncompressed
+        Target directory of the archive to be uncompressed.
    """
    if os.path.exists(target_dir):
        return
@@ -139,7 +140,13 @@ def extract_archive(file, target_dir):
    archive.close()
 def get_download_dir():
-    """Get the absolute path to the download directory."""
+    """Get the absolute path to the download directory.
+    Returns
+    -------
+    dirname : str
+        Path to the download directory
+    """
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
    dirname = os.path.join(curr_path, '../../../_download')
    if not os.path.exists(dirname):