Unverified Commit f13b9b62 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Doc] Scan the API docs and make many changes (#2080)



* WIP: api

* dgl.sampling, dgl.data

* dgl.sampling; dgl.dataloading

* sampling packages

* convert

* subgraph

* deprecate

* subgraph APIs

* All docstrings for convert/subgraph/transform

* almost all funcs under dgl namespace

* WIP: DGLGraph

* done graph query

* message passing functions

* lint

* fix merge error

* fix test

* lint

* fix
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 35e25914
"""For Graph Serialization"""
from __future__ import absolute_import
import os
from ..base import dgl_warning
from ..base import dgl_warning, DGLError
from ..heterograph import DGLHeteroGraph
from .._ffi.object import ObjectBase, register_object
from .._ffi.function import _init_api
......@@ -66,16 +66,23 @@ class GraphData(ObjectBase):
def save_graphs(filename, g_list, labels=None):
r"""
Save DGLGraphs and graph labels to file
r"""Save graphs and optionally their labels to file.
Besides saving to local files, DGL supports writing the graphs directly
to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing
``"hdfs://..."`` a path).
The function saves both the graph structure and node/edge features to file
in DGL's own binary format. For graph-level features, pass them via
the :attr:`labels` argument.
Parameters
----------
filename : str
File name to store graphs.
The file name to store the graphs and labels.
g_list: list
DGLGraph or list of DGLGraph/DGLHeteroGraph
labels: dict[str, tensor]
The graphs to be saved.
labels: dict[str, Tensor]
labels should be dict of tensors, with str as keys
Examples
......@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None):
>>> import dgl
>>> import torch as th
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
Create :class:`DGLGraph` objects and initialize node
and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
......@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None):
>>> graph_labels = {"glabel": th.tensor([0, 1])}
>>> save_graphs("./data.bin", [g1, g2], graph_labels)
See Also
--------
load_graphs
"""
# if it is local file, do some sanity check
if filename.startswith('s3://') is False:
assert not os.path.isdir(filename), "filename {} is an existing directory.".format(filename)
if os.path.isdir(filename):
raise DGLError("Filename {} is an existing directory.".format(filename))
f_path, _ = os.path.split(filename)
if not os.path.exists(f_path):
os.makedirs(f_path)
g_sample = g_list[0] if isinstance(g_list, list) else g_list
if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
save_heterographs(filename, g_list, labels)
else:
raise Exception(
"Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs/DGLHeteroGraphs")
raise DGLError(
"Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs.")
def load_graphs(filename, idx_list=None):
"""
Load DGLGraphs from file
"""Load graphs and optionally their labels from file saved by :func:`save_graphs`.
Besides loading from local files, DGL supports loading the graphs directly
from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing
``"hdfs://..."`` a path).
Parameters
----------
filename: str
filename to load graphs
idx_list: list of int
list of index of graph to be loaded. If not specified, will
load all graphs from file
The file name to load graphs from.
idx_list: list[int], optional
The indices of the graphs to be loaded if the file contains multiple graphs.
Default is loading all the graphs stored in the file.
Returns
--------
graph_list: list of DGLGraphs / DGLHeteroGraph
graph_list: list[DGLGraph]
The loaded graphs.
labels: dict[str, Tensor]
The graph labels stored in file. If no label is stored, the dictionary is empty.
Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary
always contains labels of all the graphs.
Regardless of whether the ``idx_list`` argument is given or not,
the returned dictionary always contains the labels of all the graphs.
Examples
----------
Following the example in save_graphs.
Following the example in :func:`save_graphs`.
>>> from dgl.data.utils import load_graphs
>>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
>>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]
See Also
--------
save_graphs
"""
# if it is local file, do some sanity check
assert filename.startswith('s3://') or os.path.exists(filename), "file {} does not exist.".format(filename)
if not (filename.startswith('s3://') or os.path.exists(filename)):
raise DGLError("File {} does not exist.".format(filename))
version = _CAPI_GetFileVersion(filename)
if version == 1:
......@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None):
elif version == 2:
return load_graph_v2(filename, idx_list)
else:
raise Exception("Invalid DGL Version Number")
raise DGLError("Invalid DGL Version Number.")
def load_graph_v2(filename, idx_list=None):
......
"""Classes that involves iterating over nodes or edges in a graph and generates
computation dependency of necessary nodes with neighborhood sampling methods.
"""The ``dgl.dataloading`` package contains:
This includes
* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
a graph in minibatches.
* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
a graph in minibatches.
* Data loader classes for iterating over a set of nodes or edges in a graph and generates
computation dependency via neighborhood sampling methods.
* Various sampler classes that perform neighborhood sampling for multi-layer GNNs.
* Negative samplers for link prediction.
NOTE: this module is experimental and the interfaces may be subject to changes in
future releases.
For a holistic explanation on how different components work together.
Read the user guide :ref:`guide-minibatch`.
.. note::
This package is experimental and the interfaces may be subject
to changes in future releases. It currently only has implementations in PyTorch.
"""
from .neighbor import *
from .dataloader import *
......
"""Module for various graph generator functions."""
# pylint: disable= dangerous-default-value
from . import backend as F
from . import convert
......@@ -7,13 +6,14 @@ from . import random
__all__ = ['rand_graph', 'rand_bipartite']
def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
formats=['coo', 'csr', 'csc']):
"""Generate a random graph of the given number of nodes/edges.
def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()):
"""Generate a random graph of the given number of nodes/edges and return.
It uniformly chooses ``num_edges`` from all pairs and form a graph.
It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
TODO(minjie): support RNG as one of the arguments.
To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters
----------
......@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
num_edges : int
The number of edges
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional
Device on which the graph is created. Default: CPU.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
The device of the resulting graph. It should be a framework-specific device
object (e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLHeteroGraph
Generated random graph.
DGLGraph
The generated random graph.
See Also
--------
rand_bipartite
Examples
--------
>>> import dgl
>>> dgl.rand_graph(100, 10)
Graph(num_nodes=100, num_edges=10,
ndata_schemes={}
edata_schemes={})
"""
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_nodes * num_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols),
num_nodes=num_nodes,
idtype=idtype, device=device)
return g.formats(formats)
eids = F.zerocopy_to_numpy(eids)
rows = F.zerocopy_from_numpy(eids // num_nodes)
cols = F.zerocopy_from_numpy(eids % num_nodes)
rows = F.copy_to(F.astype(rows, idtype), device)
cols = F.copy_to(F.astype(cols, idtype), device)
return convert.graph((rows, cols),
num_nodes=num_nodes,
idtype=idtype, device=device)
def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and
number of edges.
idtype=F.int64, device=F.cpu()):
"""Generate a random uni-directional bipartite graph and return.
It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
It uniformly chooses ``num_edges`` from all pairs and form a graph.
To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters
----------
......@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype,
vtype : str, optional
The name of the destination node type.
num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
The number of source nodes.
num_dst_nodes : int
The number of destination nodes, the :math:`|V|` in :math:`G=(U,V,E)`.
The number of destination nodes.
num_edges : int
The number of edges
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional
Device on which the graph is created. Default: CPU.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
The device of the resulting graph. It should be a framework-specific device
object (e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLHeteroGraph
Generated random bipartite graph.
DGLGraph
The generated random bipartite graph.
See Also
--------
rand_graph
Examples
--------
>>> import dgl
>>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10)
Graph(num_nodes={'game': 100, 'user': 50},
num_edges={('user', 'buys', 'game'): 10},
metagraph=[('user', 'game', 'buys')])
"""
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
return g.formats(formats)
eids = F.zerocopy_to_numpy(eids)
rows = F.zerocopy_from_numpy(eids // num_dst_nodes)
cols = F.zerocopy_from_numpy(eids % num_dst_nodes)
rows = F.copy_to(F.astype(rows, idtype), device)
cols = F.copy_to(F.astype(cols, idtype), device)
return convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
This diff is collapsed.
"""Package for neural network common components."""
"""The ``dgl.nn`` package contains framework-specific implementations for
common Graph Neural Network layers (or module in PyTorch, Block in MXNet).
Users can directly import ``dgl.nn.<layer_name>`` (e.g., ``dgl.nn.GraphConv``),
and the package will dispatch the layer name to the actual implementation
according to the backend framework currently in use.
Note that there are coverage differences among frameworks. If you encounter
an ``ImportError: cannot import name 'XXX'`` error, that means the layer is
not available to the current backend. If you wish a module to appear in DGL,
please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with
"[Feature Request] NN Module XXXModel". If you want to contribute a NN module,
please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
with "[NN] XXX module".
"""
import importlib
import sys
import os
......
......@@ -8,14 +8,12 @@ from . import ndarray as nd
__all__ = ['seed']
def seed(val):
"""Set the seed of randomized methods in DGL.
The randomized methods include various samplers and random walk routines.
"""Set the random seed of DGL.
Parameters
----------
val : int
The seed
The seed.
"""
_CAPI_SetSeed(val)
......@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
It out-performs numpy for non-uniform sampling in general cases.
TODO(minjie): support RNG as one of the arguments.
Parameters
----------
a : 1-D tensor or int
......@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
samples : 1-D tensor
The generated random samples
"""
#TODO(minjie): support RNG as one of the arguments.
if isinstance(size, tuple):
num = np.prod(size)
else:
......
......@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
feat : str
Node feature name.
weight : str, optional
Node weight name. If None, no weighting will be performed,
otherwise, weight each node feature with field :attr:`feat`.
for aggregation. The weight feature shape must be compatible with
Node weight name. None means aggregating without weights.
Otherwise, multiply each node feature by node feature :attr:`weight`
before aggregation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor.
op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'.
......@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Edge feature name.
The edge feature name.
weight : str, optional
Edge weight name. If None, no weighting will be performed,
The edge weight feature name. If None, no weighting will be performed,
otherwise, weight each edge feature with field :attr:`feat`.
for summation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor.
op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'.
etype : str, tuple of str, optional
Edge type. Can be omitted if there is only one edge type in the graph.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
def sum_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='sum')
def sum_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='sum')
def mean_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='mean')
def mean_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='mean')
def max_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='max')
def max_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='max')
......@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Node feature name.
The node feature name.
ntype : str, optional
Node type. Can be omitted if there is only one node type in the graph.
The node type name. Can be omitted if there is only one node type in the graph.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Edge feature name.
etype : str, typle of str, optional
Edge type. Can be omitted if there is only one edge type in the graph.
The edge feature name.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype):
topk_indices
def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in
:attr:`graph` by feature at index :attr:`sortby`. If :attr:
`descending` is set to False, return the k smallest elements instead.
"""Return a graph-level representation by a graph-wise top-k on
node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling
......@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
:math:`B` is the batch size of the input graph, :math:`D`
is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples
--------
......@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
"""
return _topk_on(graph, 'nodes', feat, k,
descending=descending, sortby=sortby,
ntype_or_etype=ntype)
def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in
:attr:`graph` by feature at index :attr:`sortby`. If :attr:
`descending` is set to False, return the k smallest elements instead.
"""Return a graph-level representation by a graph-wise top-k
on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling
......@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
:math:`B` is the batch size of the input graph, :math:`D`
is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples
--------
......@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
"""
return _topk_on(graph, 'edges', feat, k,
descending=descending, sortby=sortby,
......
"""This module contains the implementations of various sampling operators.
"""The ``dgl.sampling`` package contains operators and utilities for
sampling from a graph via random walks, neighbor sampling, etc. They
are typically used together with the ``DataLoader`` s in the
``dgl.dataloading`` package. The user guide :ref:`guide-minibatch`
gives a holistic explanation on how different components work together.
"""
from .randomwalks import *
from .pinsage import *
from .neighbor import *
This diff is collapsed.
This diff is collapsed.
......@@ -891,4 +891,12 @@ def set_num_threads(num_threads):
"""
_CAPI_DGLSetOMPThreads(num_threads)
def alias_func(func):
"""Return an alias function with proper docstring."""
@wraps(func)
def _fn(*args, **kwargs):
return func(*args, **kwargs)
_fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
return _fn
_init_api("dgl.utils.internal")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment