[GraphBolt][Doc] update examples and display methods on page (#6426)

a2a3a913 · Rhett Ying · GitHub · 3f958d7c · a2a3a913 · a2a3a913
Unverified Commit a2a3a913 authored Oct 11, 2023 by Rhett Ying Committed by GitHub Oct 11, 2023
12 changed files
--- a/docs/source/_templates/graphbolt_classtemplate.rst
+++ b/docs/source/_templates/graphbolt_classtemplate.rst
+.. role:: hidden
+    :class: hidden-section
+.. currentmodule:: {{ module }}
+
+
+{{ name | underline}}
+
+.. autoclass:: {{ name }}
+    :show-inheritance:
+    :members:
--- a/docs/source/api/python/dgl.graphbolt.rst
+++ b/docs/source/api/python/dgl.graphbolt.rst
@@ -13,7 +13,7 @@ APIs
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    Dataset
    Task
@@ -41,7 +41,7 @@ DataLoaders
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    SingleProcessDataLoader
    MultiProcessDataLoader
@@ -52,7 +52,7 @@ Standard Implementations
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    OnDiskDataset
    BuiltinDataset

--- a/python/dgl/graphbolt/dataset.py
+++ b/python/dgl/graphbolt/dataset.py
@@ -12,17 +12,17 @@ __all__ = [

 class Task:
    """An abstract task which consists of meta information and
-    *Train-Validation-Test Set*.
+    Train/Validation/Test Set.

-    *meta information*:
-    The meta information of a task includes any kinds of data that are defined
-    by the user in YAML when instantiating the task.
+    * meta information
+        The meta information of a task includes any kinds of data that are
+        defined by the user in YAML when instantiating the task.

-    *Train-Validation-Test Set*:
-    The training-validation-testing (TVT) set which is used to train the neural
-    networks. We calculate the embeddings based on their respective features
-    and the graph structure, and then utilize the embeddings to optimize the
-    neural network parameters.
+    * Train/Validation/Test Set
+        The train/validation/test (TVT) set which is used to train the neural
+        networks. We calculate the embeddings based on their respective features
+        and the graph structure, and then utilize the embeddings to optimize the
+        neural network parameters.
    """

    @property
@@ -53,18 +53,18 @@ class Dataset:
    The data abstraction could be a native CPU memory block, a shared memory
    block, a file handle of an opened file on disk, a service that provides
    the API to access the data e.t.c. There are 3 primary components in the
-    dataset: *Task*, *Feature Storage*, *Graph Topology*.
+    dataset:

-    *Task*:
-    A task consists of several meta information and the
-    *Train-Validation-Test Set*. A dataset could have multiple tasks.
+    * Task
+        A task consists of several meta information and the
+        Train/Validation/Test Set. A dataset could have multiple tasks.

-    *Feature Storage*:
-    A key-value store which stores node/edge/graph features.
+    * Feature Storage
+        A key-value store which stores node/edge/graph features.

-    *Graph Topology*:
-    Graph topology is used by the subgraph sampling algorithm to
-    generate a subgraph.
+    * Graph Topology
+        Graph topology is used by the subgraph sampling algorithm to generate
+        a subgraph.
    """

    @property

--- a/python/dgl/graphbolt/impl/basic_feature_store.py
+++ b/python/dgl/graphbolt/impl/basic_feature_store.py
@@ -64,6 +64,7 @@ class BasicFeatureStore(FeatureStore):
        feature_name: str,
    ):
        """Get the size of the specified feature in the feature store.
+
        Parameters
        ----------
        domain : str
@@ -72,6 +73,7 @@ class BasicFeatureStore(FeatureStore):
            The node or edge type name.
        feature_name : str
            The feature name.
+
        Returns
        -------
        torch.Size

--- a/python/dgl/graphbolt/impl/csc_sampling_graph.py
+++ b/python/dgl/graphbolt/impl/csc_sampling_graph.py
@@ -347,10 +347,10 @@ class CSCSamplingGraph(SamplingGraph):
        ----------
        nodes: torch.Tensor or Dict[str, torch.Tensor]
            IDs of the given seed nodes.
-            - If `nodes` is a tensor: It means the graph is homogeneous
-            graph, and ids inside are homogeneous ids.
-            - If `nodes` is a dictionary: The keys should be node type and
-            ids inside are heterogeneous ids.
+              - If `nodes` is a tensor: It means the graph is homogeneous
+                graph, and ids inside are homogeneous ids.
+              - If `nodes` is a dictionary: The keys should be node type and
+                ids inside are heterogeneous ids.
        fanouts: torch.Tensor
            The number of edges to be sampled for each node with or without
            considering edge types.
@@ -386,21 +386,22 @@ class CSCSamplingGraph(SamplingGraph):
        Examples
        --------
        >>> import dgl.graphbolt as gb
-        >>> ntypes = {'n1': 0, 'n2': 1, 'n3': 2}
-        >>> etypes = {('n1', 'e1', 'n2'): 0, ('n1', 'e2', 'n3'): 1}
+        >>> import torch
+        >>> ntypes = {"n1": 0, "n2": 1}
+        >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1}
        >>> metadata = gb.GraphMetadata(ntypes, etypes)
-        >>> indptr = torch.LongTensor([0, 3, 4, 5, 7])
-        >>> indices = torch.LongTensor([0, 1, 3, 2, 3, 0, 1])
-        >>> node_type_offset = torch.LongTensor([0, 2, 3, 4])
-        >>> type_per_edge = torch.LongTensor([0, 0, 1, 0, 1, 0, 1])
+        >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9])
+        >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1])
+        >>> node_type_offset = torch.LongTensor([0, 2, 5])
+        >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0])
        >>> graph = gb.from_csc(indptr, indices, type_per_edge=type_per_edge,
-        ... node_type_offset=node_type_offset, metadata=metadata)
-        >>> nodes = {'n1': torch.LongTensor([1]), 'n2': torch.LongTensor([0])}
+        ...     node_type_offset=node_type_offset, metadata=metadata)
+        >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])}
        >>> fanouts = torch.tensor([1, 1])
        >>> subgraph = graph.sample_neighbors(nodes, fanouts)
        >>> print(subgraph.node_pairs)
-        defaultdict(<class 'list'>, {('n1', 'e1', 'n2'): (tensor([2]), \
-        tensor([1])), ('n1', 'e2', 'n3'): (tensor([3]), tensor([2]))})
+        defaultdict(<class 'list'>, {'n1:e1:n2': (tensor([0]),
+          tensor([0])), 'n2:e2:n1': (tensor([2]), tensor([0]))})
        """
        if isinstance(nodes, dict):
            nodes = self._convert_to_homogeneous_nodes(nodes)
@@ -521,10 +522,10 @@ class CSCSamplingGraph(SamplingGraph):
        ----------
        nodes: torch.Tensor or Dict[str, torch.Tensor]
            IDs of the given seed nodes.
-            - If `nodes` is a tensor: It means the graph is homogeneous
-            graph, and ids inside are homogeneous ids.
-            - If `nodes` is a dictionary: The keys should be node type and
-            ids inside are heterogeneous ids.
+              - If `nodes` is a tensor: It means the graph is homogeneous
+                graph, and ids inside are homogeneous ids.
+              - If `nodes` is a dictionary: The keys should be node type and
+                ids inside are heterogeneous ids.
        fanouts: torch.Tensor
            The number of edges to be sampled for each node with or without
            considering edge types.
@@ -559,7 +560,23 @@ class CSCSamplingGraph(SamplingGraph):

        Examples
        --------
-        TODO: Provide typical examples.
+        >>> import dgl.graphbolt as gb
+        >>> import torch
+        >>> ntypes = {"n1": 0, "n2": 1}
+        >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1}
+        >>> metadata = gb.GraphMetadata(ntypes, etypes)
+        >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9])
+        >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1])
+        >>> node_type_offset = torch.LongTensor([0, 2, 5])
+        >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0])
+        >>> graph = gb.from_csc(indptr, indices, type_per_edge=type_per_edge,
+        ...     node_type_offset=node_type_offset, metadata=metadata)
+        >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])}
+        >>> fanouts = torch.tensor([1, 1])
+        >>> subgraph = graph.sample_layer_neighbors(nodes, fanouts)
+        >>> print(subgraph.node_pairs)
+        defaultdict(<class 'list'>, {'n1:e1:n2': (tensor([1]),
+          tensor([0])), 'n2:e2:n1': (tensor([2]), tensor([0]))})
        """
        if isinstance(nodes, dict):
            nodes = self._convert_to_homogeneous_nodes(nodes)

--- a/python/dgl/graphbolt/impl/gpu_cached_feature.py
+++ b/python/dgl/graphbolt/impl/gpu_cached_feature.py
@@ -9,40 +9,40 @@ __all__ = ["GPUCachedFeature"]


 class GPUCachedFeature(Feature):
-    r"""GPU cached feature wrapping a fallback feature."""
+    r"""GPU cached feature wrapping a fallback feature.
+
+    Places the GPU cache to torch.cuda.current_device().
+
+    Parameters
+    ----------
+    fallback_feature : Feature
+        The fallback feature.
+    cache_size : int
+        The capacity of the GPU cache, the number of features to store.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from dgl import graphbolt as gb
+    >>> torch_feat = torch.arange(10).reshape(2, -1).to("cuda")
+    >>> cache_size = 5
+    >>> fallback_feature = gb.TorchBasedFeature(torch_feat)
+    >>> feature = gb.GPUCachedFeature(fallback_feature, cache_size)
+    >>> feature.read()
+    tensor([[0, 1, 2, 3, 4],
+            [5, 6, 7, 8, 9]], device='cuda:0')
+    >>> feature.read(torch.tensor([0]).to("cuda"))
+    tensor([[0, 1, 2, 3, 4]], device='cuda:0')
+    >>> feature.update(torch.tensor([[1 for _ in range(5)]]).to("cuda"),
+    ...                torch.tensor([1]).to("cuda"))
+    >>> feature.read(torch.tensor([0, 1]).to("cuda"))
+    tensor([[0, 1, 2, 3, 4],
+            [1, 1, 1, 1, 1]], device='cuda:0')
+    >>> feature.size()
+    torch.Size([5])
+    """

    def __init__(self, fallback_feature: Feature, cache_size: int):
-        """Initialize GPU cached feature with a given fallback.
-        Places the GPU cache to torch.cuda.current_device().
-
-        Parameters
-        ----------
-        fallback_feature : Feature
-            The fallback feature.
-        cache_size : int
-            The capacity of the GPU cache, the number of features to store.
-
-        Examples
-        --------
-        >>> import torch
-        >>> from dgl import graphbolt as gb
-        >>> torch_feat = torch.arange(10).reshape(2, -1).to("cuda")
-        >>> cache_size = 5
-        >>> fallback_feature = gb.TorchBasedFeature(torch_feat)
-        >>> feature = gb.GPUCachedFeature(fallback_feature, cache_size)
-        >>> feature.read()
-        tensor([[0, 1, 2, 3, 4],
-                [5, 6, 7, 8, 9]], device='cuda:0')
-        >>> feature.read(torch.tensor([0]).to("cuda"))
-        tensor([[0, 1, 2, 3, 4]], device='cuda:0')
-        >>> feature.update(torch.tensor([[1 for _ in range(5)]]).to("cuda"),
-        ...                torch.tensor([1]).to("cuda"))
-        >>> feature.read(torch.tensor([0, 1]).to("cuda"))
-        tensor([[0, 1, 2, 3, 4],
-                [1, 1, 1, 1, 1]], device='cuda:0')
-        >>> feature.size()
-        torch.Size([5])
-        """
        super(GPUCachedFeature, self).__init__()
        assert isinstance(fallback_feature, Feature), (
            f"The fallback_feature must be an instance of Feature, but got "

--- a/python/dgl/graphbolt/impl/neighbor_sampler.py
+++ b/python/dgl/graphbolt/impl/neighbor_sampler.py
@@ -23,6 +23,51 @@ class NeighborSampler(SubgraphSampler):
    gathering unique nodes from the given node pairs, encompassing both
    positive and negative node pairs, and employs these nodes as the seed nodes
    for subsequent steps.
+
+    Parameters
+    ----------
+    datapipe : DataPipe
+        The datapipe.
+    graph : CSCSamplingGraph
+        The graph on which to perform subgraph sampling.
+    fanouts: list[torch.Tensor] or list[int]
+        The number of edges to be sampled for each node with or without
+        considering edge types. The length of this parameter implicitly
+        signifies the layer of sampling being conducted.
+    replace: bool
+        Boolean indicating whether the sample is preformed with or
+        without replacement. If True, a value can be selected multiple
+        times. Otherwise, each value can be selected only once.
+    prob_name: str, optional
+        The name of an edge attribute used as the weights of sampling for
+        each node. This attribute tensor should contain (unnormalized)
+        probabilities corresponding to each neighboring edge of a node.
+        It must be a 1D floating-point or boolean tensor, with the number
+        of elements equalling the total number of edges.
+
+    Examples
+    -------
+    >>> import dgl.graphbolt as gb
+    >>> from dgl import graphbolt as gb
+    >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8])
+    >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5])
+    >>> graph = gb.from_csc(indptr, indices)
+    >>> node_pairs = torch.LongTensor([[0, 1], [1, 2]])
+    >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
+    >>> item_sampler = gb.ItemSampler(
+        ...item_set, batch_size=1,
+        ...)
+    >>> neg_sampler = gb.UniformNegativeSampler(
+        ...item_sampler, graph, 2)
+    >>> subgraph_sampler = gb.NeighborSampler(
+        ...neg_sampler, graph, [5, 10, 15])
+    >>> for data in subgraph_sampler:
+        ... print(data.compacted_node_pairs)
+        ... print(len(data.sampled_subgraphs))
+    (tensor([0, 0, 0]), tensor([1, 0, 2]))
+    3
+    (tensor([0, 0, 0]), tensor([1, 1, 1]))
+    3
    """

    def __init__(
@@ -33,54 +78,6 @@ class NeighborSampler(SubgraphSampler):
        replace=False,
        prob_name=None,
    ):
-        """
-        Initlization for a link neighbor subgraph sampler.
-
-        Parameters
-        ----------
-        datapipe : DataPipe
-            The datapipe.
-        graph : CSCSamplingGraph
-            The graph on which to perform subgraph sampling.
-        fanouts: list[torch.Tensor] or list[int]
-            The number of edges to be sampled for each node with or without
-            considering edge types. The length of this parameter implicitly
-            signifies the layer of sampling being conducted.
-        replace: bool
-            Boolean indicating whether the sample is preformed with or
-            without replacement. If True, a value can be selected multiple
-            times. Otherwise, each value can be selected only once.
-        prob_name: str, optional
-            The name of an edge attribute used as the weights of sampling for
-            each node. This attribute tensor should contain (unnormalized)
-            probabilities corresponding to each neighboring edge of a node.
-            It must be a 1D floating-point or boolean tensor, with the number
-            of elements equalling the total number of edges.
-
-        Examples
-        -------
-        >>> import dgl.graphbolt as gb
-        >>> from dgl import graphbolt as gb
-        >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8])
-        >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5])
-        >>> graph = gb.from_csc(indptr, indices)
-        >>> node_pairs = torch.LongTensor([[0, 1], [1, 2]])
-        >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
-        >>> item_sampler = gb.ItemSampler(
-            ...item_set, batch_size=1,
-            ...)
-        >>> neg_sampler = gb.UniformNegativeSampler(
-            ...item_sampler, graph, 2)
-        >>> subgraph_sampler = gb.NeighborSampler(
-            ...neg_sampler, graph, [5, 10, 15])
-        >>> for data in subgraph_sampler:
-            ... print(data.compacted_node_pairs)
-            ... print(len(data.sampled_subgraphs))
-        (tensor([0, 0, 0]), tensor([1, 0, 2]))
-        3
-        (tensor([0, 0, 0]), tensor([1, 1, 1]))
-        3
-        """
        super().__init__(datapipe)
        self.graph = graph
        # Convert fanouts to a list of tensors.
@@ -148,6 +145,54 @@ class LayerNeighborSampler(NeighborSampler):
    NeighborSampler. However, unlike NeighborSampler, it samples fewer vertices
    and edges for multilayer GNN scenario without harming convergence speed with
    respect to training iterations.
+
+    Parameters
+    ----------
+    datapipe : DataPipe
+        The datapipe.
+    graph : CSCSamplingGraph
+        The graph on which to perform subgraph sampling.
+    fanouts: list[torch.Tensor]
+        The number of edges to be sampled for each node with or without
+        considering edge types. The length of this parameter implicitly
+        signifies the layer of sampling being conducted.
+    replace: bool
+        Boolean indicating whether the sample is preformed with or
+        without replacement. If True, a value can be selected multiple
+        times. Otherwise, each value can be selected only once.
+    prob_name: str, optional
+        The name of an edge attribute used as the weights of sampling for
+        each node. This attribute tensor should contain (unnormalized)
+        probabilities corresponding to each neighboring edge of a node.
+        It must be a 1D floating-point or boolean tensor, with the number
+        of elements equalling the total number of edges.
+
+    Examples
+    -------
+    >>> import dgl.graphbolt as gb
+    >>> from dgl import graphbolt as gb
+    >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8])
+    >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5])
+    >>> graph = gb.from_csc(indptr, indices)
+    >>> data_format = gb.LinkPredictionEdgeFormat.INDEPENDENT
+    >>> node_pairs = torch.LongTensor([[0, 1], [1, 2]])
+    >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
+    >>> item_sampler = gb.ItemSampler(
+        ...item_set, batch_size=1,
+        ...)
+    >>> neg_sampler = gb.UniformNegativeSampler(
+        ...item_sampler, 2, data_format, graph)
+    >>> fanouts = [torch.LongTensor([5]), torch.LongTensor([10]),
+        ...torch.LongTensor([15])]
+    >>> subgraph_sampler = gb.LayerNeighborSampler(
+        ...neg_sampler, graph, fanouts)
+    >>> for data in subgraph_sampler:
+        ... print(data.compacted_node_pairs)
+        ... print(len(data.sampled_subgraphs))
+    (tensor([0, 0, 0]), tensor([1, 0, 2]))
+    3
+    (tensor([0, 0, 0]), tensor([1, 1, 1]))
+    3
    """

    def __init__(
@@ -158,56 +203,5 @@ class LayerNeighborSampler(NeighborSampler):
        replace=False,
        prob_name=None,
    ):
-        """
-        Initlization for a link neighbor subgraph sampler.
-
-        Parameters
-        ----------
-        datapipe : DataPipe
-            The datapipe.
-        graph : CSCSamplingGraph
-            The graph on which to perform subgraph sampling.
-        fanouts: list[torch.Tensor]
-            The number of edges to be sampled for each node with or without
-            considering edge types. The length of this parameter implicitly
-            signifies the layer of sampling being conducted.
-        replace: bool
-            Boolean indicating whether the sample is preformed with or
-            without replacement. If True, a value can be selected multiple
-            times. Otherwise, each value can be selected only once.
-        prob_name: str, optional
-            The name of an edge attribute used as the weights of sampling for
-            each node. This attribute tensor should contain (unnormalized)
-            probabilities corresponding to each neighboring edge of a node.
-            It must be a 1D floating-point or boolean tensor, with the number
-            of elements equalling the total number of edges.
-
-        Examples
-        -------
-        >>> import dgl.graphbolt as gb
-        >>> from dgl import graphbolt as gb
-        >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8])
-        >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5])
-        >>> graph = gb.from_csc(indptr, indices)
-        >>> data_format = gb.LinkPredictionEdgeFormat.INDEPENDENT
-        >>> node_pairs = torch.LongTensor([[0, 1], [1, 2]])
-        >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
-        >>> item_sampler = gb.ItemSampler(
-            ...item_set, batch_size=1,
-            ...)
-        >>> neg_sampler = gb.UniformNegativeSampler(
-            ...item_sampler, 2, data_format, graph)
-        >>> fanouts = [torch.LongTensor([5]), torch.LongTensor([10]),
-            ...torch.LongTensor([15])]
-        >>> subgraph_sampler = gb.LayerNeighborSampler(
-            ...neg_sampler, graph, fanouts)
-        >>> for data in subgraph_sampler:
-            ... print(data.compacted_node_pairs)
-            ... print(len(data.sampled_subgraphs))
-        (tensor([0, 0, 0]), tensor([1, 0, 2]))
-        3
-        (tensor([0, 0, 0]), tensor([1, 1, 1]))
-        3
-        """
        super().__init__(datapipe, graph, fanouts, replace, prob_name)
        self.sampler = graph.sample_layer_neighbors
--- a/python/dgl/graphbolt/impl/ondisk_dataset.py
+++ b/python/dgl/graphbolt/impl/ondisk_dataset.py
@@ -478,7 +478,7 @@ class OnDiskDataset(Dataset):

 class BuiltinDataset(OnDiskDataset):
    """A utility class to download built-in dataset from AWS S3 and load it as
-    ``OnDiskDataset``.
+    :class:`OnDiskDataset`.

    Available built-in datasets include:


--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -12,49 +12,54 @@ __all__ = ["TorchBasedFeature", "TorchBasedFeatureStore"]


 class TorchBasedFeature(Feature):
-    r"""A wrapper of pytorch based feature."""
+    r"""A wrapper of pytorch based feature.
+
+    Initialize a torch based feature store by a torch feature.
+    Note that the feature can be either in memory or on disk.
+
+    Parameters
+    ----------
+    torch_feature : torch.Tensor
+        The torch feature.
+        Note that the dimension of the tensor should be greater than 1.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from dgl import graphbolt as gb
+
+    1. The feature is in memory.
+
+    >>> torch_feat = torch.arange(10).reshape(2, -1)
+    >>> feature = gb.TorchBasedFeature(torch_feat)
+    >>> feature.read()
+    tensor([[0, 1, 2, 3, 4],
+            [5, 6, 7, 8, 9]])
+    >>> feature.read(torch.tensor([0]))
+    tensor([[0, 1, 2, 3, 4]])
+    >>> feature.update(torch.tensor([[1 for _ in range(5)]]),
+    ...                      torch.tensor([1]))
+    >>> feature.read(torch.tensor([0, 1]))
+    tensor([[0, 1, 2, 3, 4],
+            [1, 1, 1, 1, 1]])
+    >>> feature.size()
+    torch.Size([5])
+
+    2. The feature is on disk.
+
+    >>> import numpy as np
+    >>> arr = np.array([[1, 2], [3, 4]])
+    >>> np.save("/tmp/arr.npy", arr)
+    >>> torch_feat = torch.from_numpy(np.load("/tmp/arr.npy", mmap_mode="r+"))
+    >>> feature = gb.TorchBasedFeature(torch_feat)
+    >>> feature.read()
+    tensor([[1, 2],
+            [3, 4]])
+    >>> feature.read(torch.tensor([0]))
+    tensor([[1, 2]])
+    """

    def __init__(self, torch_feature: torch.Tensor):
-        """Initialize a torch based feature store by a torch feature.
-
-        Note that the feature can be either in memory or on disk.
-
-        Parameters
-        ----------
-        torch_feature : torch.Tensor
-            The torch feature.
-            Note that the dimension of the tensor should be greater than 1.
-
-        Examples
-        --------
-        >>> import torch
-        >>> from dgl import graphbolt as gb
-        >>> torch_feat = torch.arange(10).reshape(2, -1)
-        >>> feature = gb.TorchBasedFeature(torch_feat)
-        >>> feature.read()
-        tensor([[0, 1, 2, 3, 4],
-                [5, 6, 7, 8, 9]])
-        >>> feature.read(torch.tensor([0]))
-        tensor([[0, 1, 2, 3, 4]])
-        >>> feature.update(torch.tensor([[1 for _ in range(5)]]),
-        ...                      torch.tensor([1]))
-        >>> feature.read(torch.tensor([0, 1]))
-        tensor([[0, 1, 2, 3, 4],
-                [1, 1, 1, 1, 1]])
-        >>> feature.size()
-        torch.Size([5])
-
-        >>> import numpy as np
-        >>> arr = np.array([[1, 2], [3, 4]])
-        >>> np.save("/tmp/arr.npy", arr)
-        >>> torch_feat = torch.from_numpy(np.load("/tmp/arr.npy", mmap_mode="r+"))
-        >>> feature = gb.TorchBasedFeature(torch_feat)
-        >>> feature.read()
-        tensor([[1, 2],
-                [3, 4]])
-        >>> feature.read(torch.tensor([0]))
-        tensor([[1, 2]])
-        """
        super().__init__()
        assert isinstance(torch_feature, torch.Tensor), (
            f"torch_feature in TorchBasedFeature must be torch.Tensor, "
@@ -90,6 +95,7 @@ class TorchBasedFeature(Feature):
    def size(self):
        """Get the size of the feature.

+        Returns
        -------
        torch.Size
            The size of the feature.
@@ -130,48 +136,41 @@ class TorchBasedFeature(Feature):


 class TorchBasedFeatureStore(BasicFeatureStore):
-    r"""A store to manage multiple pytorch based feature for access."""
+    r"""A store to manage multiple pytorch based feature for access.
+
+    The feature stores are described by the `feat_data`. The `feat_data` is a
+    list of `OnDiskFeatureData`.
+
+    For a feature store, its format must be either "pt" or "npy" for Pytorch or
+    Numpy formats. If the format is "pt", the feature store must be loaded in
+    memory. If the format is "npy", the feature store can be loaded in memory or
+    on disk.
+
+    Parameters
+    ----------
+    feat_data : List[OnDiskFeatureData]
+        The description of the feature stores.
+
+    Examples
+    --------
+    >>> import torch
+    >>> import numpy as np
+    >>> from dgl import graphbolt as gb
+    >>> edge_label = torch.tensor([[1], [2], [3]])
+    >>> node_feat = torch.tensor([[1, 2, 3], [4, 5, 6]])
+    >>> torch.save(edge_label, "/tmp/edge_label.pt")
+    >>> np.save("/tmp/node_feat.npy", node_feat.numpy())
+    >>> feat_data = [
+    ...     gb.OnDiskFeatureData(domain="edge", type="author:writes:paper",
+    ...         name="label", format="torch", path="/tmp/edge_label.pt",
+    ...         in_memory=True),
+    ...     gb.OnDiskFeatureData(domain="node", type="paper", name="feat",
+    ...         format="numpy", path="/tmp/node_feat.npy", in_memory=False),
+    ... ]
+    >>> feature_sotre = gb.TorchBasedFeatureStore(feat_data)
+    """

    def __init__(self, feat_data: List[OnDiskFeatureData]):
-        r"""Load feature stores from disk.
-
-        The feature stores are described by the `feat_data`. The `feat_data` is a
-        list of `OnDiskFeatureData`.
-
-        For a feature store, its format must be either "pt" or "npy" for Pytorch or
-        Numpy formats. If the format is "pt", the feature store must be loaded in
-        memory. If the format is "npy", the feature store can be loaded in memory or
-        on disk.
-
-        Parameters
-        ----------
-        feat_data : List[OnDiskFeatureData]
-            The description of the feature stores.
-
-        Returns
-        -------
-        dict
-            The loaded feature stores. The keys are the names of the feature stores,
-            and the values are the feature stores.
-
-        Examples
-        --------
-        >>> import torch
-        >>> import numpy as np
-        >>> from dgl import graphbolt as gb
-        >>> edge_label = torch.tensor([[1], [2], [3]])
-        >>> node_feat = torch.tensor([[1, 2, 3], [4, 5, 6]])
-        >>> torch.save(edge_label, "/tmp/edge_label.pt")
-        >>> np.save("/tmp/node_feat.npy", node_feat.numpy())
-        >>> feat_data = [
-        ...     gb.OnDiskFeatureData(domain="edge", type="author:writes:paper",
-        ...         name="label", format="torch", path="/tmp/edge_label.pt",
-        ...         in_memory=True),
-        ...     gb.OnDiskFeatureData(domain="node", type="paper", name="feat",
-        ...         format="numpy", path="/tmp/node_feat.npy", in_memory=False),
-        ... ]
-        >>> feature_sotre = gb.TorchBasedFeatureStore(feat_data)
-        """
        features = {}
        for spec in feat_data:
            key = (spec.domain, spec.type, spec.name)

--- a/python/dgl/graphbolt/impl/uniform_negative_sampler.py
+++ b/python/dgl/graphbolt/impl/uniform_negative_sampler.py
@@ -17,6 +17,35 @@ class UniformNegativeSampler(NegativeSampler):
    For each edge ``(u, v)``, it is supposed to generate `negative_ratio` pairs
    of negative edges ``(u, v')``, where ``v'`` is chosen uniformly from all
    the nodes in the graph.
+
+    Parameters
+    ----------
+    datapipe : DataPipe
+        The datapipe.
+    graph : CSCSamplingGraph
+        The graph on which to perform negative sampling.
+    negative_ratio : int
+        The proportion of negative samples to positive samples.
+
+    Examples
+    --------
+    >>> from dgl import graphbolt as gb
+    >>> indptr = torch.LongTensor([0, 2, 4, 5])
+    >>> indices = torch.LongTensor([1, 2, 0, 2, 0])
+    >>> graph = gb.from_csc(indptr, indices)
+    >>> node_pairs = (torch.tensor([0, 1]), torch.tensor([1, 2]))
+    >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
+    >>> item_sampler = gb.ItemSampler(
+        ...item_set, batch_size=1,
+        ...)
+    >>> neg_sampler = gb.UniformNegativeSampler(
+        ...item_sampler, graph, 2)
+    >>> for minibatch in neg_sampler:
+        ...  print(minibatch.negative_srcs)
+        ...  print(minibatch.negative_dsts)
+        ...
+    (tensor([0, 0, 0]), tensor([1, 1, 2]), tensor([1, 0, 0]))
+    (tensor([1, 1, 1]), tensor([2, 1, 2]), tensor([1, 0, 0]))
    """

    def __init__(
@@ -25,38 +54,6 @@ class UniformNegativeSampler(NegativeSampler):
        graph,
        negative_ratio,
    ):
-        """
-        Initlization for a uniform negative sampler.
-
-        Parameters
-        ----------
-        datapipe : DataPipe
-            The datapipe.
-        graph : CSCSamplingGraph
-            The graph on which to perform negative sampling.
-        negative_ratio : int
-            The proportion of negative samples to positive samples.
-
-        Examples
-        --------
-        >>> from dgl import graphbolt as gb
-        >>> indptr = torch.LongTensor([0, 2, 4, 5])
-        >>> indices = torch.LongTensor([1, 2, 0, 2, 0])
-        >>> graph = gb.from_csc(indptr, indices)
-        >>> node_pairs = (torch.tensor([0, 1]), torch.tensor([1, 2]))
-        >>> item_set = gb.ItemSet(node_pairs, names="node_pairs")
-        >>> item_sampler = gb.ItemSampler(
-            ...item_set, batch_size=1,
-            ...)
-        >>> neg_sampler = gb.UniformNegativeSampler(
-            ...item_sampler, graph, 2)
-        >>> for minibatch in neg_sampler:
-            ...  print(minibatch.negative_srcs)
-            ...  print(minibatch.negative_dsts)
-            ...
-        (tensor([0, 0, 0]), tensor([1, 1, 2]), tensor([1, 0, 0]))
-        (tensor([1, 1, 1]), tensor([2, 1, 2]), tensor([1, 0, 0]))
-        """
        super().__init__(datapipe, negative_ratio)
        self.graph = graph


--- a/python/dgl/graphbolt/item_sampler.py
+++ b/python/dgl/graphbolt/item_sampler.py
@@ -105,6 +105,7 @@ class ItemSampler(IterDataPipe):
    Examples
    --------
    1. Node IDs.
+
    >>> import torch
    >>> from dgl import graphbolt as gb
    >>> item_set = gb.ItemSet(torch.arange(0, 10), names="seed_nodes")
@@ -119,6 +120,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_dsts=None)

    2. Node pairs.
+
    >>> item_set = gb.ItemSet(torch.arange(0, 20).reshape(-1, 2),
    ...     names="node_pairs")
    >>> item_sampler = gb.ItemSampler(
@@ -133,6 +135,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    3. Node pairs and labels.
+
    >>> item_set = gb.ItemSet(
    ...     (torch.arange(0, 20).reshape(-1, 2), torch.arange(10, 20)),
    ...     names=("node_pairs", "labels")
@@ -149,6 +152,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    4. Node pairs and negative destinations.
+
    >>> node_pairs = torch.arange(0, 20).reshape(-1, 2)
    >>> negative_dsts = torch.arange(10, 30).reshape(-1, 2)
    >>> item_set = gb.ItemSet((node_pairs, negative_dsts), names=("node_pairs",
@@ -168,6 +172,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    5. DGLGraphs.
+
    >>> import dgl
    >>> graphs = [ dgl.rand_graph(10, 20) for _ in range(5) ]
    >>> item_set = gb.ItemSet(graphs)
@@ -181,7 +186,8 @@ class ItemSampler(IterDataPipe):
      edata_schemes={})]

    6. Further process batches with other datapipes such as
-    `torchdata.datapipes.iter.Mapper`.
+    :class:`torchdata.datapipes.iter.Mapper`.
+
    >>> item_set = gb.ItemSet(torch.arange(0, 10))
    >>> data_pipe = gb.ItemSampler(item_set, 4)
    >>> def add_one(batch):
@@ -191,6 +197,7 @@ class ItemSampler(IterDataPipe):
    [tensor([1, 2, 3, 4]), tensor([5, 6, 7, 8]), tensor([ 9, 10])]

    7. Heterogeneous node IDs.
+
    >>> ids = {
    ...     "user": gb.ItemSet(torch.arange(0, 5), names="seed_nodes"),
    ...     "item": gb.ItemSet(torch.arange(0, 6), names="seed_nodes"),
@@ -205,6 +212,7 @@ class ItemSampler(IterDataPipe):
    compacted_negative_dsts=None)

    8. Heterogeneous node pairs.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> node_pairs_follow = torch.arange(10, 20).reshape(-1, 2)
    >>> item_set = gb.ItemSetDict({
@@ -224,6 +232,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    9. Heterogeneous node pairs and labels.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> labels_like = torch.arange(0, 10)
    >>> node_pairs_follow = torch.arange(10, 20).reshape(-1, 2)
@@ -246,6 +255,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_dsts=None)

    10. Heterogeneous node pairs and negative destinations.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> negative_dsts_like = torch.arange(10, 20).reshape(-1, 2)
    >>> node_pairs_follow = torch.arange(20, 30).reshape(-1, 2)

--- a/python/dgl/graphbolt/itemset.py
+++ b/python/dgl/graphbolt/itemset.py
@@ -28,6 +28,7 @@ class ItemSet:
    >>> from dgl import graphbolt as gb

    1. Single iterable: seed nodes.
+
    >>> node_ids = torch.arange(0, 5)
    >>> item_set = gb.ItemSet(node_ids, names="seed_nodes")
    >>> list(item_set)
@@ -36,6 +37,7 @@ class ItemSet:
    ('seed_nodes',)

    2. Tuple of iterables with same shape: seed nodes and labels.
+
    >>> node_ids = torch.arange(0, 5)
    >>> labels = torch.arange(5, 10)
    >>> item_set = gb.ItemSet(
@@ -47,6 +49,7 @@ class ItemSet:
    ('seed_nodes', 'labels')

    3. Tuple of iterables with different shape: node pairs and negative dsts.
+
    >>> node_pairs = torch.arange(0, 10).reshape(-1, 2)
    >>> neg_dsts = torch.arange(10, 25).reshape(-1, 3)
    >>> item_set = gb.ItemSet(
@@ -133,6 +136,7 @@ class ItemSetDict:
    >>> from dgl import graphbolt as gb

    1. Single iterable: seed nodes.
+
    >>> node_ids_user = torch.arange(0, 5)
    >>> node_ids_item = torch.arange(5, 10)
    >>> item_set = gb.ItemSetDict({
@@ -147,6 +151,7 @@ class ItemSetDict:
    ('seed_nodes',)

    2. Tuple of iterables with same shape: seed nodes and labels.
+
    >>> node_ids_user = torch.arange(0, 2)
    >>> labels_user = torch.arange(0, 2)
    >>> node_ids_item = torch.arange(2, 5)
@@ -166,6 +171,7 @@ class ItemSetDict:
    ('seed_nodes', 'labels')

    3. Tuple of iterables with different shape: node pairs and negative dsts.
+
    >>> node_pairs_like = torch.arange(0, 4).reshape(-1, 2)
    >>> neg_dsts_like = torch.arange(4, 10).reshape(-1, 3)
    >>> node_pairs_follow = torch.arange(0, 6).reshape(-1, 2)