[GraphBolt][Doc] update examples and display methods on page (#6426)

a2a3a913 · Rhett Ying · GitHub · 3f958d7c · a2a3a913 · a2a3a913
Unverified Commit a2a3a913 authored Oct 11, 2023 by Rhett Ying Committed by GitHub Oct 11, 2023
12 changed files
--- a/docs/source/_templates/graphbolt_classtemplate.rst
+++ b/docs/source/_templates/graphbolt_classtemplate.rst
+.. role:: hidden
+    :class: hidden-section
+.. currentmodule:: {{ module }}
+
+
+{{ name | underline}}
+
+.. autoclass:: {{ name }}
+    :show-inheritance:
+    :members:
--- a/docs/source/api/python/dgl.graphbolt.rst
+++ b/docs/source/api/python/dgl.graphbolt.rst
@@ -13,7 +13,7 @@ APIs
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    Dataset
    Task
@@ -41,7 +41,7 @@ DataLoaders
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    SingleProcessDataLoader
    MultiProcessDataLoader
@@ -52,7 +52,7 @@ Standard Implementations
 .. autosummary::
    :toctree: ../../generated/
    :nosignatures:
-    :template: classtemplate.rst
+    :template: graphbolt_classtemplate.rst

    OnDiskDataset
    BuiltinDataset

--- a/python/dgl/graphbolt/dataset.py
+++ b/python/dgl/graphbolt/dataset.py
@@ -12,14 +12,14 @@ __all__ = [

 class Task:
    """An abstract task which consists of meta information and
-    *Train-Validation-Test Set*.
+    Train/Validation/Test Set.

-    *meta information*:
-    The meta information of a task includes any kinds of data that are defined
-    by the user in YAML when instantiating the task.
+    * meta information
+        The meta information of a task includes any kinds of data that are
+        defined by the user in YAML when instantiating the task.

-    *Train-Validation-Test Set*:
-    The training-validation-testing (TVT) set which is used to train the neural
+    * Train/Validation/Test Set
+        The train/validation/test (TVT) set which is used to train the neural
        networks. We calculate the embeddings based on their respective features
        and the graph structure, and then utilize the embeddings to optimize the
        neural network parameters.
@@ -53,18 +53,18 @@ class Dataset:
    The data abstraction could be a native CPU memory block, a shared memory
    block, a file handle of an opened file on disk, a service that provides
    the API to access the data e.t.c. There are 3 primary components in the
-    dataset: *Task*, *Feature Storage*, *Graph Topology*.
+    dataset:

-    *Task*:
+    * Task
        A task consists of several meta information and the
-    *Train-Validation-Test Set*. A dataset could have multiple tasks.
+        Train/Validation/Test Set. A dataset could have multiple tasks.

-    *Feature Storage*:
+    * Feature Storage
        A key-value store which stores node/edge/graph features.

-    *Graph Topology*:
-    Graph topology is used by the subgraph sampling algorithm to
-    generate a subgraph.
+    * Graph Topology
+        Graph topology is used by the subgraph sampling algorithm to generate
+        a subgraph.
    """

    @property

--- a/python/dgl/graphbolt/impl/basic_feature_store.py
+++ b/python/dgl/graphbolt/impl/basic_feature_store.py
@@ -64,6 +64,7 @@ class BasicFeatureStore(FeatureStore):
        feature_name: str,
    ):
        """Get the size of the specified feature in the feature store.
+
        Parameters
        ----------
        domain : str
@@ -72,6 +73,7 @@ class BasicFeatureStore(FeatureStore):
            The node or edge type name.
        feature_name : str
            The feature name.
+
        Returns
        -------
        torch.Size

--- a/python/dgl/graphbolt/impl/csc_sampling_graph.py
+++ b/python/dgl/graphbolt/impl/csc_sampling_graph.py
@@ -386,21 +386,22 @@ class CSCSamplingGraph(SamplingGraph):
        Examples
        --------
        >>> import dgl.graphbolt as gb
-        >>> ntypes = {'n1': 0, 'n2': 1, 'n3': 2}
-        >>> etypes = {('n1', 'e1', 'n2'): 0, ('n1', 'e2', 'n3'): 1}
+        >>> import torch
+        >>> ntypes = {"n1": 0, "n2": 1}
+        >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1}
        >>> metadata = gb.GraphMetadata(ntypes, etypes)
-        >>> indptr = torch.LongTensor([0, 3, 4, 5, 7])
-        >>> indices = torch.LongTensor([0, 1, 3, 2, 3, 0, 1])
-        >>> node_type_offset = torch.LongTensor([0, 2, 3, 4])
-        >>> type_per_edge = torch.LongTensor([0, 0, 1, 0, 1, 0, 1])
+        >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9])
+        >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1])
+        >>> node_type_offset = torch.LongTensor([0, 2, 5])
+        >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0])
        >>> graph = gb.from_csc(indptr, indices, type_per_edge=type_per_edge,
        ...     node_type_offset=node_type_offset, metadata=metadata)
-        >>> nodes = {'n1': torch.LongTensor([1]), 'n2': torch.LongTensor([0])}
+        >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])}
        >>> fanouts = torch.tensor([1, 1])
        >>> subgraph = graph.sample_neighbors(nodes, fanouts)
        >>> print(subgraph.node_pairs)
-        defaultdict(<class 'list'>, {('n1', 'e1', 'n2'): (tensor([2]), \
-        tensor([1])), ('n1', 'e2', 'n3'): (tensor([3]), tensor([2]))})
+        defaultdict(<class 'list'>, {'n1:e1:n2': (tensor([0]),
+          tensor([0])), 'n2:e2:n1': (tensor([2]), tensor([0]))})
        """
        if isinstance(nodes, dict):
            nodes = self._convert_to_homogeneous_nodes(nodes)
@@ -559,7 +560,23 @@ class CSCSamplingGraph(SamplingGraph):

        Examples
        --------
-        TODO: Provide typical examples.
+        >>> import dgl.graphbolt as gb
+        >>> import torch
+        >>> ntypes = {"n1": 0, "n2": 1}
+        >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1}
+        >>> metadata = gb.GraphMetadata(ntypes, etypes)
+        >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9])
+        >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1])
+        >>> node_type_offset = torch.LongTensor([0, 2, 5])
+        >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0])
+        >>> graph = gb.from_csc(indptr, indices, type_per_edge=type_per_edge,
+        ...     node_type_offset=node_type_offset, metadata=metadata)
+        >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])}
+        >>> fanouts = torch.tensor([1, 1])
+        >>> subgraph = graph.sample_layer_neighbors(nodes, fanouts)
+        >>> print(subgraph.node_pairs)
+        defaultdict(<class 'list'>, {'n1:e1:n2': (tensor([1]),
+          tensor([0])), 'n2:e2:n1': (tensor([2]), tensor([0]))})
        """
        if isinstance(nodes, dict):
            nodes = self._convert_to_homogeneous_nodes(nodes)

--- a/python/dgl/graphbolt/impl/gpu_cached_feature.py
+++ b/python/dgl/graphbolt/impl/gpu_cached_feature.py
@@ -9,10 +9,8 @@ __all__ = ["GPUCachedFeature"]


 class GPUCachedFeature(Feature):
-    r"""GPU cached feature wrapping a fallback feature."""
+    r"""GPU cached feature wrapping a fallback feature.

-    def __init__(self, fallback_feature: Feature, cache_size: int):
-        """Initialize GPU cached feature with a given fallback.
    Places the GPU cache to torch.cuda.current_device().

    Parameters
@@ -43,6 +41,8 @@ class GPUCachedFeature(Feature):
    >>> feature.size()
    torch.Size([5])
    """
+
+    def __init__(self, fallback_feature: Feature, cache_size: int):
        super(GPUCachedFeature, self).__init__()
        assert isinstance(fallback_feature, Feature), (
            f"The fallback_feature must be an instance of Feature, but got "

--- a/python/dgl/graphbolt/impl/neighbor_sampler.py
+++ b/python/dgl/graphbolt/impl/neighbor_sampler.py
@@ -23,18 +23,6 @@ class NeighborSampler(SubgraphSampler):
    gathering unique nodes from the given node pairs, encompassing both
    positive and negative node pairs, and employs these nodes as the seed nodes
    for subsequent steps.
-    """
-
-    def __init__(
-        self,
-        datapipe,
-        graph,
-        fanouts,
-        replace=False,
-        prob_name=None,
-    ):
-        """
-        Initlization for a link neighbor subgraph sampler.

    Parameters
    ----------
@@ -81,6 +69,15 @@ class NeighborSampler(SubgraphSampler):
    (tensor([0, 0, 0]), tensor([1, 1, 1]))
    3
    """
+
+    def __init__(
+        self,
+        datapipe,
+        graph,
+        fanouts,
+        replace=False,
+        prob_name=None,
+    ):
        super().__init__(datapipe)
        self.graph = graph
        # Convert fanouts to a list of tensors.
@@ -148,18 +145,6 @@ class LayerNeighborSampler(NeighborSampler):
    NeighborSampler. However, unlike NeighborSampler, it samples fewer vertices
    and edges for multilayer GNN scenario without harming convergence speed with
    respect to training iterations.
-    """
-
-    def __init__(
-        self,
-        datapipe,
-        graph,
-        fanouts,
-        replace=False,
-        prob_name=None,
-    ):
-        """
-        Initlization for a link neighbor subgraph sampler.

    Parameters
    ----------
@@ -209,5 +194,14 @@ class LayerNeighborSampler(NeighborSampler):
    (tensor([0, 0, 0]), tensor([1, 1, 1]))
    3
    """
+
+    def __init__(
+        self,
+        datapipe,
+        graph,
+        fanouts,
+        replace=False,
+        prob_name=None,
+    ):
        super().__init__(datapipe, graph, fanouts, replace, prob_name)
        self.sampler = graph.sample_layer_neighbors
--- a/python/dgl/graphbolt/impl/ondisk_dataset.py
+++ b/python/dgl/graphbolt/impl/ondisk_dataset.py
@@ -478,7 +478,7 @@ class OnDiskDataset(Dataset):

 class BuiltinDataset(OnDiskDataset):
    """A utility class to download built-in dataset from AWS S3 and load it as
-    ``OnDiskDataset``.
+    :class:`OnDiskDataset`.

    Available built-in datasets include:


--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -12,11 +12,9 @@ __all__ = ["TorchBasedFeature", "TorchBasedFeatureStore"]


 class TorchBasedFeature(Feature):
-    r"""A wrapper of pytorch based feature."""
-
-    def __init__(self, torch_feature: torch.Tensor):
-        """Initialize a torch based feature store by a torch feature.
+    r"""A wrapper of pytorch based feature.

+    Initialize a torch based feature store by a torch feature.
    Note that the feature can be either in memory or on disk.

    Parameters
@@ -29,6 +27,9 @@ class TorchBasedFeature(Feature):
    --------
    >>> import torch
    >>> from dgl import graphbolt as gb
+
+    1. The feature is in memory.
+
    >>> torch_feat = torch.arange(10).reshape(2, -1)
    >>> feature = gb.TorchBasedFeature(torch_feat)
    >>> feature.read()
@@ -44,6 +45,8 @@ class TorchBasedFeature(Feature):
    >>> feature.size()
    torch.Size([5])

+    2. The feature is on disk.
+
    >>> import numpy as np
    >>> arr = np.array([[1, 2], [3, 4]])
    >>> np.save("/tmp/arr.npy", arr)
@@ -55,6 +58,8 @@ class TorchBasedFeature(Feature):
    >>> feature.read(torch.tensor([0]))
    tensor([[1, 2]])
    """
+
+    def __init__(self, torch_feature: torch.Tensor):
        super().__init__()
        assert isinstance(torch_feature, torch.Tensor), (
            f"torch_feature in TorchBasedFeature must be torch.Tensor, "
@@ -90,6 +95,7 @@ class TorchBasedFeature(Feature):
    def size(self):
        """Get the size of the feature.

+        Returns
        -------
        torch.Size
            The size of the feature.
@@ -130,10 +136,7 @@ class TorchBasedFeature(Feature):


 class TorchBasedFeatureStore(BasicFeatureStore):
-    r"""A store to manage multiple pytorch based feature for access."""
-
-    def __init__(self, feat_data: List[OnDiskFeatureData]):
-        r"""Load feature stores from disk.
+    r"""A store to manage multiple pytorch based feature for access.

    The feature stores are described by the `feat_data`. The `feat_data` is a
    list of `OnDiskFeatureData`.
@@ -148,12 +151,6 @@ class TorchBasedFeatureStore(BasicFeatureStore):
    feat_data : List[OnDiskFeatureData]
        The description of the feature stores.

-        Returns
-        -------
-        dict
-            The loaded feature stores. The keys are the names of the feature stores,
-            and the values are the feature stores.
-
    Examples
    --------
    >>> import torch
@@ -172,6 +169,8 @@ class TorchBasedFeatureStore(BasicFeatureStore):
    ... ]
    >>> feature_sotre = gb.TorchBasedFeatureStore(feat_data)
    """
+
+    def __init__(self, feat_data: List[OnDiskFeatureData]):
        features = {}
        for spec in feat_data:
            key = (spec.domain, spec.type, spec.name)

--- a/python/dgl/graphbolt/impl/uniform_negative_sampler.py
+++ b/python/dgl/graphbolt/impl/uniform_negative_sampler.py
@@ -17,16 +17,6 @@ class UniformNegativeSampler(NegativeSampler):
    For each edge ``(u, v)``, it is supposed to generate `negative_ratio` pairs
    of negative edges ``(u, v')``, where ``v'`` is chosen uniformly from all
    the nodes in the graph.
-    """
-
-    def __init__(
-        self,
-        datapipe,
-        graph,
-        negative_ratio,
-    ):
-        """
-        Initlization for a uniform negative sampler.

    Parameters
    ----------
@@ -57,6 +47,13 @@ class UniformNegativeSampler(NegativeSampler):
    (tensor([0, 0, 0]), tensor([1, 1, 2]), tensor([1, 0, 0]))
    (tensor([1, 1, 1]), tensor([2, 1, 2]), tensor([1, 0, 0]))
    """
+
+    def __init__(
+        self,
+        datapipe,
+        graph,
+        negative_ratio,
+    ):
        super().__init__(datapipe, negative_ratio)
        self.graph = graph


--- a/python/dgl/graphbolt/item_sampler.py
+++ b/python/dgl/graphbolt/item_sampler.py
@@ -105,6 +105,7 @@ class ItemSampler(IterDataPipe):
    Examples
    --------
    1. Node IDs.
+
    >>> import torch
    >>> from dgl import graphbolt as gb
    >>> item_set = gb.ItemSet(torch.arange(0, 10), names="seed_nodes")
@@ -119,6 +120,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_dsts=None)

    2. Node pairs.
+
    >>> item_set = gb.ItemSet(torch.arange(0, 20).reshape(-1, 2),
    ...     names="node_pairs")
    >>> item_sampler = gb.ItemSampler(
@@ -133,6 +135,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    3. Node pairs and labels.
+
    >>> item_set = gb.ItemSet(
    ...     (torch.arange(0, 20).reshape(-1, 2), torch.arange(10, 20)),
    ...     names=("node_pairs", "labels")
@@ -149,6 +152,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    4. Node pairs and negative destinations.
+
    >>> node_pairs = torch.arange(0, 20).reshape(-1, 2)
    >>> negative_dsts = torch.arange(10, 30).reshape(-1, 2)
    >>> item_set = gb.ItemSet((node_pairs, negative_dsts), names=("node_pairs",
@@ -168,6 +172,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    5. DGLGraphs.
+
    >>> import dgl
    >>> graphs = [ dgl.rand_graph(10, 20) for _ in range(5) ]
    >>> item_set = gb.ItemSet(graphs)
@@ -181,7 +186,8 @@ class ItemSampler(IterDataPipe):
      edata_schemes={})]

    6. Further process batches with other datapipes such as
-    `torchdata.datapipes.iter.Mapper`.
+    :class:`torchdata.datapipes.iter.Mapper`.
+
    >>> item_set = gb.ItemSet(torch.arange(0, 10))
    >>> data_pipe = gb.ItemSampler(item_set, 4)
    >>> def add_one(batch):
@@ -191,6 +197,7 @@ class ItemSampler(IterDataPipe):
    [tensor([1, 2, 3, 4]), tensor([5, 6, 7, 8]), tensor([ 9, 10])]

    7. Heterogeneous node IDs.
+
    >>> ids = {
    ...     "user": gb.ItemSet(torch.arange(0, 5), names="seed_nodes"),
    ...     "item": gb.ItemSet(torch.arange(0, 6), names="seed_nodes"),
@@ -205,6 +212,7 @@ class ItemSampler(IterDataPipe):
    compacted_negative_dsts=None)

    8. Heterogeneous node pairs.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> node_pairs_follow = torch.arange(10, 20).reshape(-1, 2)
    >>> item_set = gb.ItemSetDict({
@@ -224,6 +232,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_srcs=None, compacted_negative_dsts=None)

    9. Heterogeneous node pairs and labels.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> labels_like = torch.arange(0, 10)
    >>> node_pairs_follow = torch.arange(10, 20).reshape(-1, 2)
@@ -246,6 +255,7 @@ class ItemSampler(IterDataPipe):
        compacted_negative_dsts=None)

    10. Heterogeneous node pairs and negative destinations.
+
    >>> node_pairs_like = torch.arange(0, 10).reshape(-1, 2)
    >>> negative_dsts_like = torch.arange(10, 20).reshape(-1, 2)
    >>> node_pairs_follow = torch.arange(20, 30).reshape(-1, 2)

--- a/python/dgl/graphbolt/itemset.py
+++ b/python/dgl/graphbolt/itemset.py
@@ -28,6 +28,7 @@ class ItemSet:
    >>> from dgl import graphbolt as gb

    1. Single iterable: seed nodes.
+
    >>> node_ids = torch.arange(0, 5)
    >>> item_set = gb.ItemSet(node_ids, names="seed_nodes")
    >>> list(item_set)
@@ -36,6 +37,7 @@ class ItemSet:
    ('seed_nodes',)

    2. Tuple of iterables with same shape: seed nodes and labels.
+
    >>> node_ids = torch.arange(0, 5)
    >>> labels = torch.arange(5, 10)
    >>> item_set = gb.ItemSet(
@@ -47,6 +49,7 @@ class ItemSet:
    ('seed_nodes', 'labels')

    3. Tuple of iterables with different shape: node pairs and negative dsts.
+
    >>> node_pairs = torch.arange(0, 10).reshape(-1, 2)
    >>> neg_dsts = torch.arange(10, 25).reshape(-1, 3)
    >>> item_set = gb.ItemSet(
@@ -133,6 +136,7 @@ class ItemSetDict:
    >>> from dgl import graphbolt as gb

    1. Single iterable: seed nodes.
+
    >>> node_ids_user = torch.arange(0, 5)
    >>> node_ids_item = torch.arange(5, 10)
    >>> item_set = gb.ItemSetDict({
@@ -147,6 +151,7 @@ class ItemSetDict:
    ('seed_nodes',)

    2. Tuple of iterables with same shape: seed nodes and labels.
+
    >>> node_ids_user = torch.arange(0, 2)
    >>> labels_user = torch.arange(0, 2)
    >>> node_ids_item = torch.arange(2, 5)
@@ -166,6 +171,7 @@ class ItemSetDict:
    ('seed_nodes', 'labels')

    3. Tuple of iterables with different shape: node pairs and negative dsts.
+
    >>> node_pairs_like = torch.arange(0, 4).reshape(-1, 2)
    >>> neg_dsts_like = torch.arange(4, 10).reshape(-1, 3)
    >>> node_pairs_follow = torch.arange(0, 6).reshape(-1, 2)