Unverified Commit 3349d341 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt][Doc] update top level APIs (#6415)

parent 821c1e42
......@@ -5,7 +5,7 @@
.. currentmodule:: dgl.graphbolt
`dgl.graphbolt` is a dataloading framework for GNN that provides well-defined APIs for each stage of the data pipeline and multiple standard implementations.
**dgl.graphbolt** is a dataloading framework for GNN that provides well-defined APIs for each stage of the data pipeline and multiple standard implementations.
APIs
-------------------------
......@@ -20,19 +20,21 @@ APIs
ItemSet
ItemSetDict
ItemSampler
DistributedItemSampler
NegativeSampler
SubgraphSampler
SampledSubgraph
exclude_edges
SamplingGraph
MiniBatch
MiniBatchTransformer
DGLMiniBatch
DGLMiniBatchConverter
Feature
Feature.read
Feature.update
FeatureStore
FeatureStore.read
FeatureStore.update
FeatureFetcher
CopyTo
DataLoaders
-----------
......
......@@ -10,6 +10,12 @@ from .item_sampler import ItemSampler
from .utils import datapipe_graph_to_adjlist
__all__ = [
"SingleProcessDataLoader",
"MultiProcessDataLoader",
]
class SingleProcessDataLoader(torch.utils.data.DataLoader):
"""Single process DataLoader.
......
......@@ -11,9 +11,8 @@ __all__ = [
class Task:
"""An abstract task.
Task consists of several meta information and *Train-Validation-Test Set*.
"""An abstract task which consists of meta information and
*Train-Validation-Test Set*.
*meta information*:
The meta information of a task includes any kinds of data that are defined
......@@ -48,9 +47,9 @@ class Task:
class Dataset:
"""An abstract dataset.
"""An abstract dataset which provides abstraction for accessing the data
required for training.
Dataset provides abstraction for accessing the data required for training.
The data abstraction could be a native CPU memory block, a shared memory
block, a file handle of an opened file on disk, a service that provides
the API to access the data e.t.c. There are 3 primary components in the
......
......@@ -7,6 +7,11 @@ from torch.utils.data import functional_datapipe
from .minibatch_transformer import MiniBatchTransformer
__all__ = [
"FeatureFetcher",
]
@functional_datapipe("fetch_feature")
class FeatureFetcher(MiniBatchTransformer):
"""A feature fetcher used to fetch features for node/edge in graphbolt."""
......
......@@ -6,7 +6,7 @@ __all__ = ["Feature", "FeatureStore"]
class Feature:
r"""Base class for feature."""
r"""A wrapper of feature data for access."""
def __init__(self):
pass
......@@ -54,7 +54,7 @@ class Feature:
class FeatureStore:
r"""Base class for feature store."""
r"""A store to manage multiple features for access."""
def __init__(self):
pass
......
......@@ -19,9 +19,7 @@ __all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"]
def minibatcher_default(batch, names):
"""Default minibatcher.
The default minibatcher maps a list of items to a `MiniBatch` with the
"""Default minibatcher which maps a list of items to a `MiniBatch` with the
same names as the items. The names of items are supposed to be provided
and align with the data attributes of `MiniBatch`. If any unknown item name
is provided, exception will be raised. If the names of items are not
......@@ -80,11 +78,11 @@ def minibatcher_default(batch, names):
class ItemSampler(IterDataPipe):
"""Item Sampler.
"""A sampler to iterate over input items and create subsets.
Creates item subset of data which could be node IDs, node pairs with or
without labels, node pairs with negative sources/destinations, DGLGraphs
and heterogeneous counterparts.
Input items could be node IDs, node pairs with or without labels, node
pairs with negative sources/destinations, DGLGraphs and heterogeneous
counterparts.
Note: This class `ItemSampler` is not decorated with
`torchdata.datapipes.functional_datapipe` on purpose. This indicates it
......@@ -338,7 +336,7 @@ class ItemSampler(IterDataPipe):
class DistributedItemSampler(ItemSampler):
"""Distributed Item Sampler.
"""A sampler to iterate over input items and create subsets distributedly.
This sampler creates a distributed subset of items from the given data set,
which can be used for training with PyTorch's Distributed Data Parallel
......
......@@ -6,7 +6,7 @@ __all__ = ["ItemSet", "ItemSetDict"]
class ItemSet:
r"""An iterable itemset.
r"""A wrapper of iterable data or tuple of iterable data.
All itemsets that represent an iterable of items should subclass it. Such
form of itemset is particularly useful when items come from a stream. This
......@@ -118,7 +118,7 @@ class ItemSet:
class ItemSetDict:
r"""An iterable ItemsetDict.
r"""Dictionary wrapper of **ItemSet**.
Each item is retrieved by iterating over each itemset and returned with
corresponding key as a dict.
......
......@@ -17,7 +17,7 @@ __all__ = ["DGLMiniBatch", "MiniBatch"]
@dataclass
class DGLMiniBatch:
r"""A data class designed for the DGL library, encompassing all the
necessary fields for computation using the DGL library.."""
necessary fields for computation using the DGL library."""
blocks: List[DGLBlock] = None
"""A list of 'DGLBlock's, each one corresponding to one layer, representing
......@@ -98,8 +98,9 @@ class DGLMiniBatch:
@dataclass
class MiniBatch:
r"""A composite data class for data structure in the graphbolt. It is
designed to facilitate the exchange of data among different components
r"""A composite data class for data structure in the graphbolt.
It is designed to facilitate the exchange of data among different components
involved in processing data. The purpose of this class is to unify the
representation of input and output data across different stages, ensuring
consistency and ease of use throughout the loading process."""
......
......@@ -6,6 +6,11 @@ from torchdata.datapipes.iter import Mapper
from .minibatch import MiniBatch
__all__ = [
"MiniBatchTransformer",
"DGLMiniBatchConverter",
]
@functional_datapipe("transform")
class MiniBatchTransformer(Mapper):
......
......@@ -6,6 +6,10 @@ from torch.utils.data import functional_datapipe
from .minibatch_transformer import MiniBatchTransformer
__all__ = [
"NegativeSampler",
]
@functional_datapipe("sample_negative")
class NegativeSampler(MiniBatchTransformer):
......
......@@ -7,6 +7,9 @@ import torch
from .base import etype_str_to_tuple
__all__ = ["SampledSubgraph"]
class SampledSubgraph:
r"""An abstract class for sampled subgraph. In the context of a
heterogeneous graph, each field should be of `Dict` type. Otherwise,
......
......@@ -3,6 +3,9 @@
from typing import Dict, Union
__all__ = ["SamplingGraph"]
class SamplingGraph:
r"""Class for sampling graph."""
......
......@@ -9,6 +9,10 @@ from .base import etype_str_to_tuple
from .minibatch_transformer import MiniBatchTransformer
from .utils import unique_and_compact
__all__ = [
"SubgraphSampler",
]
@functional_datapipe("sample_subgraph")
class SubgraphSampler(MiniBatchTransformer):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment