Unverified Commit f13b9b62 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Doc] Scan the API docs and make many changes (#2080)



* WIP: api

* dgl.sampling, dgl.data

* dgl.sampling; dgl.dataloading

* sampling packages

* convert

* subgraph

* deprecate

* subgraph APIs

* All docstrings for convert/subgraph/transform

* almost all funcs under dgl namespace

* WIP: DGLGraph

* done graph query

* message passing functions

* lint

* fix merge error

* fix test

* lint

* fix
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 35e25914
"""For Graph Serialization""" """For Graph Serialization"""
from __future__ import absolute_import from __future__ import absolute_import
import os import os
from ..base import dgl_warning from ..base import dgl_warning, DGLError
from ..heterograph import DGLHeteroGraph from ..heterograph import DGLHeteroGraph
from .._ffi.object import ObjectBase, register_object from .._ffi.object import ObjectBase, register_object
from .._ffi.function import _init_api from .._ffi.function import _init_api
...@@ -66,16 +66,23 @@ class GraphData(ObjectBase): ...@@ -66,16 +66,23 @@ class GraphData(ObjectBase):
def save_graphs(filename, g_list, labels=None): def save_graphs(filename, g_list, labels=None):
r""" r"""Save graphs and optionally their labels to file.
Save DGLGraphs and graph labels to file
Besides saving to local files, DGL supports writing the graphs directly
to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing
``"hdfs://..."`` a path).
The function saves both the graph structure and node/edge features to file
in DGL's own binary format. For graph-level features, pass them via
the :attr:`labels` argument.
Parameters Parameters
---------- ----------
filename : str filename : str
File name to store graphs. The file name to store the graphs and labels.
g_list: list g_list: list
DGLGraph or list of DGLGraph/DGLHeteroGraph The graphs to be saved.
labels: dict[str, tensor] labels: dict[str, Tensor]
labels should be dict of tensors, with str as keys labels should be dict of tensors, with str as keys
Examples Examples
...@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None): ...@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None):
>>> import dgl >>> import dgl
>>> import torch as th >>> import torch as th
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node Create :class:`DGLGraph` objects and initialize node
and edge features. and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
...@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None): ...@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None):
>>> graph_labels = {"glabel": th.tensor([0, 1])} >>> graph_labels = {"glabel": th.tensor([0, 1])}
>>> save_graphs("./data.bin", [g1, g2], graph_labels) >>> save_graphs("./data.bin", [g1, g2], graph_labels)
See Also
--------
load_graphs
""" """
# if it is local file, do some sanity check # if it is local file, do some sanity check
if filename.startswith('s3://') is False: if filename.startswith('s3://') is False:
assert not os.path.isdir(filename), "filename {} is an existing directory.".format(filename) if os.path.isdir(filename):
raise DGLError("Filename {} is an existing directory.".format(filename))
f_path, _ = os.path.split(filename) f_path, _ = os.path.split(filename)
if not os.path.exists(f_path): if not os.path.exists(f_path):
os.makedirs(f_path) os.makedirs(f_path)
g_sample = g_list[0] if isinstance(g_list, list) else g_list g_sample = g_list[0] if isinstance(g_list, list) else g_list
if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
save_heterographs(filename, g_list, labels) save_heterographs(filename, g_list, labels)
else: else:
raise Exception( raise DGLError(
"Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs/DGLHeteroGraphs") "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs.")
def load_graphs(filename, idx_list=None): def load_graphs(filename, idx_list=None):
""" """Load graphs and optionally their labels from file saved by :func:`save_graphs`.
Load DGLGraphs from file
Besides loading from local files, DGL supports loading the graphs directly
from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing
``"hdfs://..."`` a path).
Parameters Parameters
---------- ----------
filename: str filename: str
filename to load graphs The file name to load graphs from.
idx_list: list of int idx_list: list[int], optional
list of index of graph to be loaded. If not specified, will The indices of the graphs to be loaded if the file contains multiple graphs.
load all graphs from file Default is loading all the graphs stored in the file.
Returns Returns
-------- --------
graph_list: list of DGLGraphs / DGLHeteroGraph graph_list: list[DGLGraph]
The loaded graphs. The loaded graphs.
labels: dict[str, Tensor] labels: dict[str, Tensor]
The graph labels stored in file. If no label is stored, the dictionary is empty. The graph labels stored in file. If no label is stored, the dictionary is empty.
Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary Regardless of whether the ``idx_list`` argument is given or not,
always contains labels of all the graphs. the returned dictionary always contains the labels of all the graphs.
Examples Examples
---------- ----------
Following the example in save_graphs. Following the example in :func:`save_graphs`.
>>> from dgl.data.utils import load_graphs >>> from dgl.data.utils import load_graphs
>>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2] >>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
>>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1] >>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]
See Also
--------
save_graphs
""" """
# if it is local file, do some sanity check # if it is local file, do some sanity check
assert filename.startswith('s3://') or os.path.exists(filename), "file {} does not exist.".format(filename) if not (filename.startswith('s3://') or os.path.exists(filename)):
raise DGLError("File {} does not exist.".format(filename))
version = _CAPI_GetFileVersion(filename) version = _CAPI_GetFileVersion(filename)
if version == 1: if version == 1:
...@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None): ...@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None):
elif version == 2: elif version == 2:
return load_graph_v2(filename, idx_list) return load_graph_v2(filename, idx_list)
else: else:
raise Exception("Invalid DGL Version Number") raise DGLError("Invalid DGL Version Number.")
def load_graph_v2(filename, idx_list=None): def load_graph_v2(filename, idx_list=None):
......
"""Classes that involves iterating over nodes or edges in a graph and generates """The ``dgl.dataloading`` package contains:
computation dependency of necessary nodes with neighborhood sampling methods.
This includes * Data loader classes for iterating over a set of nodes or edges in a graph and generates
computation dependency via neighborhood sampling methods.
* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
a graph in minibatches.
* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
a graph in minibatches.
* Various sampler classes that perform neighborhood sampling for multi-layer GNNs. * Various sampler classes that perform neighborhood sampling for multi-layer GNNs.
* Negative samplers for link prediction. * Negative samplers for link prediction.
NOTE: this module is experimental and the interfaces may be subject to changes in For a holistic explanation on how different components work together.
future releases. Read the user guide :ref:`guide-minibatch`.
.. note::
This package is experimental and the interfaces may be subject
to changes in future releases. It currently only has implementations in PyTorch.
""" """
from .neighbor import * from .neighbor import *
from .dataloader import * from .dataloader import *
......
"""Module for various graph generator functions.""" """Module for various graph generator functions."""
# pylint: disable= dangerous-default-value
from . import backend as F from . import backend as F
from . import convert from . import convert
...@@ -7,13 +6,14 @@ from . import random ...@@ -7,13 +6,14 @@ from . import random
__all__ = ['rand_graph', 'rand_bipartite'] __all__ = ['rand_graph', 'rand_bipartite']
def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(), def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()):
formats=['coo', 'csr', 'csc']): """Generate a random graph of the given number of nodes/edges and return.
"""Generate a random graph of the given number of nodes/edges.
It uniformly chooses ``num_edges`` from all pairs and form a graph. It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
TODO(minjie): support RNG as one of the arguments. To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters Parameters
---------- ----------
...@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(), ...@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
num_edges : int num_edges : int
The number of edges The number of edges
idtype : int32, int64, optional idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64. The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional device : Device context, optional
Device on which the graph is created. Default: CPU. The device of the resulting graph. It should be a framework-specific device
formats : str or list of str object (e.g., torch.device). By default, DGL stores the graph on CPU.
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
Generated random graph. The generated random graph.
See Also
--------
rand_bipartite
Examples
--------
>>> import dgl
>>> dgl.rand_graph(100, 10)
Graph(num_nodes=100, num_edges=10,
ndata_schemes={}
edata_schemes={})
""" """
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_nodes * num_nodes, num_edges, replace=False) eids = random.choice(num_nodes * num_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device) eids = F.zerocopy_to_numpy(eids)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device) rows = F.zerocopy_from_numpy(eids // num_nodes)
g = convert.graph((rows, cols), cols = F.zerocopy_from_numpy(eids % num_nodes)
num_nodes=num_nodes, rows = F.copy_to(F.astype(rows, idtype), device)
idtype=idtype, device=device) cols = F.copy_to(F.astype(cols, idtype), device)
return g.formats(formats) return convert.graph((rows, cols),
num_nodes=num_nodes,
idtype=idtype, device=device)
def rand_bipartite(utype, etype, vtype, def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges, num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(), idtype=F.int64, device=F.cpu()):
formats=['csr', 'coo', 'csc']): """Generate a random uni-directional bipartite graph and return.
"""Generate a random bipartite graph of the given number of src/dst nodes and
number of edges. It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
It uniformly chooses ``num_edges`` from all pairs and form a graph. To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters Parameters
---------- ----------
...@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype, ...@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype,
vtype : str, optional vtype : str, optional
The name of the destination node type. The name of the destination node type.
num_src_nodes : int num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`. The number of source nodes.
num_dst_nodes : int num_dst_nodes : int
The number of destination nodes, the :math:`|V|` in :math:`G=(U,V,E)`. The number of destination nodes.
num_edges : int num_edges : int
The number of edges The number of edges
idtype : int32, int64, optional idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64. The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional device : Device context, optional
Device on which the graph is created. Default: CPU. The device of the resulting graph. It should be a framework-specific device
formats : str or list of str object (e.g., torch.device). By default, DGL stores the graph on CPU.
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
Generated random bipartite graph. The generated random bipartite graph.
See Also
--------
rand_graph
Examples
--------
>>> import dgl
>>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10)
Graph(num_nodes={'game': 100, 'user': 50},
num_edges={('user', 'buys', 'game'): 10},
metagraph=[('user', 'game', 'buys')])
""" """
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False) eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device) eids = F.zerocopy_to_numpy(eids)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device) rows = F.zerocopy_from_numpy(eids // num_dst_nodes)
g = convert.heterograph({(utype, etype, vtype): (rows, cols)}, cols = F.zerocopy_from_numpy(eids % num_dst_nodes)
{utype: num_src_nodes, vtype: num_dst_nodes}, rows = F.copy_to(F.astype(rows, idtype), device)
idtype=idtype, device=device) cols = F.copy_to(F.astype(cols, idtype), device)
return g.formats(formats) return convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
"""Classes for heterogeneous graphs.""" """Classes for heterogeneous graphs."""
#pylint: disable= too-many-lines #pylint: disable= too-many-lines
from collections import defaultdict, Iterable from collections import defaultdict
from collections.abc import Mapping from collections.abc import Mapping, Iterable
from contextlib import contextmanager from contextlib import contextmanager
import copy import copy
import numbers import numbers
...@@ -21,158 +21,17 @@ from .view import HeteroNodeView, HeteroNodeDataView, HeteroEdgeView, HeteroEdge ...@@ -21,158 +21,17 @@ from .view import HeteroNodeView, HeteroNodeDataView, HeteroEdgeView, HeteroEdge
__all__ = ['DGLHeteroGraph', 'combine_names'] __all__ = ['DGLHeteroGraph', 'combine_names']
class DGLHeteroGraph(object): class DGLHeteroGraph(object):
"""Base heterogeneous graph class. """Class for storing graph structure and node/edge feature data.
**Do NOT instantiate from this class directly; use** :mod:`conversion methods
<dgl.convert>` **instead.**
A Heterogeneous graph is defined as a graph with node types and edge
types.
If two edges share the same edge type, then their source nodes, as well
as their destination nodes, also have the same type (the source node
types don't have to be the same as the destination node types).
Examples
--------
Suppose that we want to construct the following heterogeneous graph:
.. graphviz::
digraph G {
Alice -> Bob [label=follows]
Bob -> Carol [label=follows]
Alice -> Tetris [label=plays]
Bob -> Tetris [label=plays]
Bob -> Minecraft [label=plays]
Carol -> Minecraft [label=plays]
Nintendo -> Tetris [label=develops]
Mojang -> Minecraft [label=develops]
{rank=source; Alice; Bob; Carol}
{rank=sink; Nintendo; Mojang}
}
And suppose that one maps the users, games and developers to the following
IDs:
========= ===== === =====
User name Alice Bob Carol
========= ===== === =====
User ID 0 1 2
========= ===== === =====
========= ====== =========
Game name Tetris Minecraft
========= ====== =========
Game ID 0 1
========= ====== =========
============== ======== ======
Developer name Nintendo Mojang
============== ======== ======
Developer ID 0 1
============== ======== ======
One can construct the graph as follows:
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
... })
Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
>>> g.number_of_nodes('user')
3
>>> g.number_of_edges('plays')
4
>>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' edge type
tensor([1, 1])
>>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' edge type
(tensor([0]), tensor([0]))
Or on the sliced graph for an edge type:
>>> g['plays'].number_of_edges()
4
>>> g['develops'].out_degrees()
tensor([1, 1])
>>> g['develops'].in_edges(0)
(tensor([0]), tensor([0]))
Node type names must be distinct (no two types have the same name). Edge types could
have the same name but they must be distinguishable by the ``(src_type, edge_type, dst_type)``
triplet (called *canonical edge type*).
For example, suppose a graph that has two types of relation "user-watches-movie"
and "user-watches-TV" as follows:
>>> GG = dgl.heterograph({
... ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
... ('user', 'watches', 'TV'): ([0, 1], [0, 1])
... })
To distinguish between the two "watches" edge type, one must specify a full triplet:
>>> GG.number_of_edges(('user', 'watches', 'movie'))
3
>>> GG.number_of_edges(('user', 'watches', 'TV'))
2
>>> GG['user', 'watches', 'movie'].out_degrees()
tensor([1, 2])
Using only one single edge type string "watches" is ambiguous and will cause error:
>>> GG.number_of_edges('watches') # AMBIGUOUS!!
In many cases, there is only one type of nodes or one type of edges, and the ``ntype``
and ``etype`` argument could be omitted. This is very common when using the sliced
graph, which usually contains only one edge type, and sometimes only one node type:
>>> g['follows'].number_of_nodes() # OK!! because g['follows'] only has one node type 'user'
3
>>> g['plays'].number_of_nodes() # ERROR!! There are two types 'user' and 'game'.
>>> g['plays'].number_of_edges() # OK!! because there is only one edge type 'plays'
TODO(minjie): docstring about uni-directional bipartite graph
Metagraph
---------
For each heterogeneous graph, one can often infer the *metagraph*, the template of
edge connections showing how many types of nodes and edges exist in the graph, and
how each edge type could connect between node types.
One can analyze the example gameplay graph above and figure out the metagraph as
follows:
.. graphviz::
digraph G {
User -> User [label=follows]
User -> Game [label=plays]
Developer -> Game [label=develops]
}
There are a few ways to create create a DGLGraph:
Parameters * To create a homogeneous graph from Tensor data, use :func:`dgl.graph`.
---------- * To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`.
gidx : HeteroGraphIndex * To create a graph from other data sources, use ``dgl.*`` create ops. See
Graph index object. :ref:`api-graph-create-ops`.
ntypes : list of str, pair of list of str
Node type list. ``ntypes[i]`` stores the name of node type i. Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its
If a pair is given, the graph created is a uni-directional bipartite graph, usage.
and its SRC node types and DST node types are given as in the pair.
etypes : list of str
Edge type list. ``etypes[i]`` stores the name of edge type i.
node_frames : list[Frame], optional
Node feature storage. If None, empty frame is created.
Otherwise, ``node_frames[i]`` stores the node features
of node type i. (default: None)
edge_frames : list[Frame], optional
Edge feature storage. If None, empty frame is created.
Otherwise, ``edge_frames[i]`` stores the edge features
of edge type i. (default: None)
""" """
is_block = False is_block = False
...@@ -184,6 +43,27 @@ class DGLHeteroGraph(object): ...@@ -184,6 +43,27 @@ class DGLHeteroGraph(object):
node_frames=None, node_frames=None,
edge_frames=None, edge_frames=None,
**deprecate_kwargs): **deprecate_kwargs):
"""Internal constructor for creating a DGLGraph.
Parameters
----------
gidx : HeteroGraphIndex
Graph index object.
ntypes : list of str, pair of list of str
Node type list. ``ntypes[i]`` stores the name of node type i.
If a pair is given, the graph created is a uni-directional bipartite graph,
and its SRC node types and DST node types are given as in the pair.
etypes : list of str
Edge type list. ``etypes[i]`` stores the name of edge type i.
node_frames : list[Frame], optional
Node feature storage. If None, empty frame is created.
Otherwise, ``node_frames[i]`` stores the node features
of node type i. (default: None)
edge_frames : list[Frame], optional
Edge feature storage. If None, empty frame is created.
Otherwise, ``edge_frames[i]`` stores the edge features
of edge type i. (default: None)
"""
if isinstance(gidx, DGLHeteroGraph): if isinstance(gidx, DGLHeteroGraph):
raise DGLError('The input is already a DGLGraph. No need to create it again.') raise DGLError('The input is already a DGLGraph. No need to create it again.')
if not isinstance(gidx, heterograph_index.HeteroGraphIndex): if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
...@@ -851,12 +731,17 @@ class DGLHeteroGraph(object): ...@@ -851,12 +731,17 @@ class DGLHeteroGraph(object):
@property @property
def ntypes(self): def ntypes(self):
"""Return the node types of the graph. """Return all the node type names in the graph.
Returns Returns
------- -------
list of str list[str]
Each ``str`` is a node type. All the node type names in a list.
Notes
-----
DGL internally assigns an integer ID for each node type. The returned
node type names are sorted according to their IDs.
Examples Examples
-------- --------
...@@ -877,19 +762,27 @@ class DGLHeteroGraph(object): ...@@ -877,19 +762,27 @@ class DGLHeteroGraph(object):
@property @property
def etypes(self): def etypes(self):
"""Return the edge types of the graph. """Return all the edge type names in the graph.
Returns Returns
------- -------
list of str list[str]
Each ``str`` is an edge type. All the edge type names in a list.
Notes Notes
----- -----
An edge type can appear in multiple canonical edge types. For example, ``'interacts'`` DGL internally assigns an integer ID for each edge type. The returned
can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and edge type names are sorted according to their IDs.
``('protein', 'interacts', 'protein')``. It is recommended to use
:func:`~dgl.DGLGraph.canonical_etypes` in this case. The complete format to specify an relation is a string triplet ``(str, str, str)``
for source node type, edge type and destination node type. DGL calls this
format *canonical edge type*. An edge type can appear in multiple canonical edge types.
For example, ``'interacts'`` can appear in two canonical edge types
``('drug', 'interacts', 'drug')`` and ``('protein', 'interacts', 'protein')``.
See Also
--------
canonical_etypes
Examples Examples
-------- --------
...@@ -910,16 +803,24 @@ class DGLHeteroGraph(object): ...@@ -910,16 +803,24 @@ class DGLHeteroGraph(object):
@property @property
def canonical_etypes(self): def canonical_etypes(self):
"""Return the canonical edge types of the graph. """Return all the canonical edge types in the graph.
A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where A canonical edge type is a string triplet ``(str, str, str)``
``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges for source node type, edge type and destination node type.
and destination nodes respectively.
Returns Returns
------- -------
list of 3-tuple of str list[(str, str, str)]
Each 3-tuple of str is a canonical edge type. All the canonical edge type triplets in a list.
Notes
-----
DGL internally assigns an integer ID for each edge type. The returned
edge type names are sorted according to their IDs.
See Also
--------
etypes
Examples Examples
-------- --------
...@@ -942,15 +843,24 @@ class DGLHeteroGraph(object): ...@@ -942,15 +843,24 @@ class DGLHeteroGraph(object):
@property @property
def srctypes(self): def srctypes(self):
"""Return the source node types. """Return all the source node type names in this graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Returns Returns
------- -------
list of str list[str]
All the source node type names in a list.
* If the graph is a uni-bipartite graph, it returns the source node types. See Also
For a definition of uni-bipartite, see :func:`is_unibipartite`. --------
* Otherwise, it returns all node types in the graph. dsttypes
is_unibipartite
Examples Examples
-------- --------
...@@ -984,16 +894,24 @@ class DGLHeteroGraph(object): ...@@ -984,16 +894,24 @@ class DGLHeteroGraph(object):
@property @property
def dsttypes(self): def dsttypes(self):
"""Return the destination node types. """Return all the destination node type names in this graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Returns Returns
------- -------
list of str list[str]
Each str is a node type. All the destination node type names in a list.
* If the graph is a uni-bipartite graph, it returns the destination node types. See Also
For a definition of uni-bipartite, see :func:`is_unibipartite`. --------
* Otherwise, it returns all node types in the graph. srctypes
is_unibipartite
Examples Examples
-------- --------
...@@ -1065,29 +983,24 @@ class DGLHeteroGraph(object): ...@@ -1065,29 +983,24 @@ class DGLHeteroGraph(object):
def to_canonical_etype(self, etype): def to_canonical_etype(self, etype):
"""Convert an edge type to the corresponding canonical edge type in the graph. """Convert an edge type to the corresponding canonical edge type in the graph.
A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where A canonical edge type is a string triplet ``(str, str, str)``
``src_type``, ``edge_type``, ``dst_type`` are separately the type of source for source node type, edge type and destination node type.
nodes, edges and destination nodes.
The function expects the given edge type name can uniquely identify a canonical edge
type. DGL will raise error if this is not the case.
Parameters Parameters
---------- ----------
etype : str or 3-tuple of str etype : str or (str, str, str)
If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
type in the graph. If :attr:`etype` is already a canonical edge type type in the graph. If :attr:`etype` is already a canonical edge type,
(3-tuple of str), it simply returns :attr:`etype`. it directly returns the input unchanged.
Returns Returns
------- -------
3-tuple of str (str, str, str)
The canonical edge type corresponding to the edge type. The canonical edge type corresponding to the edge type.
Notes
-----
If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
for :attr:`etype` while ``'watches'`` is a valid one.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
...@@ -1131,7 +1044,7 @@ class DGLHeteroGraph(object): ...@@ -1131,7 +1044,7 @@ class DGLHeteroGraph(object):
return ret return ret
def get_ntype_id(self, ntype): def get_ntype_id(self, ntype):
"""Return the id of the given node type. """Return the ID of the given node type.
ntype can also be None. If so, there should be only one node type in the ntype can also be None. If so, there should be only one node type in the
graph. graph.
...@@ -1165,7 +1078,7 @@ class DGLHeteroGraph(object): ...@@ -1165,7 +1078,7 @@ class DGLHeteroGraph(object):
return ntid return ntid
def get_ntype_id_from_src(self, ntype): def get_ntype_id_from_src(self, ntype):
"""Return the id of the given SRC node type. """Internal function to return the ID of the given SRC node type.
ntype can also be None. If so, there should be only one node type in the ntype can also be None. If so, there should be only one node type in the
SRC category. Callable even when the self graph is not uni-bipartite. SRC category. Callable even when the self graph is not uni-bipartite.
...@@ -1190,7 +1103,7 @@ class DGLHeteroGraph(object): ...@@ -1190,7 +1103,7 @@ class DGLHeteroGraph(object):
return ntid return ntid
def get_ntype_id_from_dst(self, ntype): def get_ntype_id_from_dst(self, ntype):
"""Return the id of the given DST node type. """Internal function to return the ID of the given DST node type.
ntype can also be None. If so, there should be only one node type in the ntype can also be None. If so, there should be only one node type in the
DST category. Callable even when the self graph is not uni-bipartite. DST category. Callable even when the self graph is not uni-bipartite.
...@@ -2057,16 +1970,16 @@ class DGLHeteroGraph(object): ...@@ -2057,16 +1970,16 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
def number_of_nodes(self, ntype=None): def number_of_nodes(self, ntype=None):
"""Alias of :func:`num_nodes`""" """Alias of :meth:`num_nodes`"""
return self.num_nodes(ntype) return self.num_nodes(ntype)
def num_nodes(self, ntype=None): def num_nodes(self, ntype=None):
"""Return the number of nodes. """Return the number of nodes of in the graph.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The node type for query. If given, it returns the number of nodes for a particular The node type name. If given, it returns the number of nodes of the
type. If not given (default), it returns the total number of nodes of all types. type. If not given (default), it returns the total number of nodes of all types.
Returns Returns
...@@ -2104,17 +2017,24 @@ class DGLHeteroGraph(object): ...@@ -2104,17 +2017,24 @@ class DGLHeteroGraph(object):
return self._graph.number_of_nodes(self.get_ntype_id(ntype)) return self._graph.number_of_nodes(self.get_ntype_id(ntype))
def number_of_src_nodes(self, ntype=None): def number_of_src_nodes(self, ntype=None):
"""Alias of :func:`num_src_nodes`""" """Alias of :meth:`num_src_nodes`"""
return self.num_src_nodes(ntype) return self.num_src_nodes(ntype)
def num_src_nodes(self, ntype=None): def num_src_nodes(self, ntype=None):
"""Return the number of nodes of the given source node type. """Return the number of source nodes in the graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The source node type for query. If given, it returns the number of nodes for a The source node type name. If given, it returns the number of nodes for
particular source node type. If not given (default), it returns the number of the source node type. If not given (default), it returns the number of
nodes summed over all source node types. nodes summed over all source node types.
Returns Returns
...@@ -2122,6 +2042,11 @@ class DGLHeteroGraph(object): ...@@ -2122,6 +2042,11 @@ class DGLHeteroGraph(object):
int int
The number of nodes The number of nodes
See Also
--------
num_dst_nodes
is_unibipartite
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
...@@ -2162,20 +2087,32 @@ class DGLHeteroGraph(object): ...@@ -2162,20 +2087,32 @@ class DGLHeteroGraph(object):
return self.num_dst_nodes(ntype) return self.num_dst_nodes(ntype)
def num_dst_nodes(self, ntype=None): def num_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given destination node type. """Return the number of destination nodes in the graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The destination node type for query. If given, it returns the number of nodes for a The destination node type name. If given, it returns the number of nodes of
particular destination node type. If not given (default), it returns the number of the destination node type. If not given (default), it returns the number of
nodes summed over all destination node types. nodes summed over all the destination node types.
Returns Returns
------- -------
int int
The number of nodes The number of nodes
See Also
--------
num_src_nodes
is_unibipartite
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
...@@ -2216,16 +2153,19 @@ class DGLHeteroGraph(object): ...@@ -2216,16 +2153,19 @@ class DGLHeteroGraph(object):
return self.num_edges(etype) return self.num_edges(etype)
def num_edges(self, etype=None): def num_edges(self, etype=None):
"""Return the number of edges. """Return the number of edges in the graph.
Parameters Parameters
---------- ----------
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type name of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a * ``(str, str, str)`` for source node type, edge type and destination node type.
particular edge type. If not given (default), it returns the total number of edges * or one ``str`` edge type name if the name can uniquely identify a
of all types. triplet format in the graph.
If not provided, return the total number of edges regardless of the types
in the graph.
Returns Returns
------- -------
...@@ -2277,22 +2217,21 @@ class DGLHeteroGraph(object): ...@@ -2277,22 +2217,21 @@ class DGLHeteroGraph(object):
@property @property
def is_multigraph(self): def is_multigraph(self):
"""Whether the graph is a multigraph """Return whether the graph is a multigraph with parallel edges.
In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
For a heterogeneous graph of multiple canonical edge types, we consider it as a A multigraph has more than one edges between the same pair of nodes, called
multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any *parallel edges*. For heterogeneous graphs, parallel edge further requires
canonical edge type. the canonical edge type to be the same (see :meth:`canonical_etypes` for the
definition).
Returns Returns
------- -------
bool bool
Whether the graph is a multigraph. True if the graph is a multigraph.
Notes Notes
----- -----
Checking whether the graph is a multigraph can be expensive for a large one. Checking whether the graph is a multigraph could be expensive for a large one.
Examples Examples
-------- --------
...@@ -2330,14 +2269,14 @@ class DGLHeteroGraph(object): ...@@ -2330,14 +2269,14 @@ class DGLHeteroGraph(object):
@property @property
def is_homogeneous(self): def is_homogeneous(self):
"""Whether the graph is a homogeneous graph. """Return whether the graph is a homogeneous graph.
A homogeneous graph only has one node type and one edge type. A homogeneous graph only has one node type and one edge type.
Returns Returns
------- -------
bool bool
Whether the graph is a homogeneous graph. True if the graph is a homogeneous graph.
Examples Examples
-------- --------
...@@ -2366,7 +2305,7 @@ class DGLHeteroGraph(object): ...@@ -2366,7 +2305,7 @@ class DGLHeteroGraph(object):
@property @property
def is_readonly(self): def is_readonly(self):
"""Deprecated: DGLGraph will always be mutable. """**DEPRECATED**: DGLGraph will always be mutable.
Returns Returns
------- -------
...@@ -2424,38 +2363,33 @@ class DGLHeteroGraph(object): ...@@ -2424,38 +2363,33 @@ class DGLHeteroGraph(object):
return self._graph.dtype return self._graph.dtype
def __contains__(self, vid): def __contains__(self, vid):
"""Deprecated: please directly call :func:`has_nodes`. """**DEPRECATED**: please directly call :func:`has_nodes`."""
"""
dgl_warning('DGLGraph.__contains__ is deprecated.' dgl_warning('DGLGraph.__contains__ is deprecated.'
' Please directly call has_nodes.') ' Please directly call has_nodes.')
return self.has_nodes(vid) return self.has_nodes(vid)
def has_nodes(self, vid, ntype=None): def has_nodes(self, vid, ntype=None):
"""Whether the graph has some particular node(s) of a given type. """Return whether the graph contains the given nodes.
Parameters Parameters
---------- ----------
vid : node ID(s) vid : node ID(s)
The node ID(s) for query. The allowed formats are: The nodes IDs. The allowed nodes ID formats are:
* ``int``: The ID of a single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
- ``int``: The ID of a single node.
- ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
of integers that contains the IDs of multiple nodes.
ntype : str, optional ntype : str, optional
The node type for query. It is required if the graph has The node type name. Can be omitted if there is
multiple node types. only one type of nodes in the graph.
Returns Returns
------- -------
bool or bool Tensor bool or bool Tensor
A tensor of bool flags where each element is True if the node is in the graph.
- If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating If the input is a single node, return one bool value.
whether the graph has the particular node.
- If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
the result will be a bool Tensor whose i-th element indicates whether
the graph has node :attr:`vid[i]` of the given type.
Examples Examples
-------- --------
...@@ -2494,50 +2428,47 @@ class DGLHeteroGraph(object): ...@@ -2494,50 +2428,47 @@ class DGLHeteroGraph(object):
def has_node(self, vid, ntype=None): def has_node(self, vid, ntype=None):
"""Whether the graph has a particular node of a given type. """Whether the graph has a particular node of a given type.
DEPRECATED: see :func:`~DGLGraph.has_nodes` **DEPRECATED**: see :func:`~DGLGraph.has_nodes`
""" """
dgl_warning("DGLGraph.has_node is deprecated. Please use DGLGraph.has_nodes") dgl_warning("DGLGraph.has_node is deprecated. Please use DGLGraph.has_nodes")
return self.has_nodes(vid, ntype) return self.has_nodes(vid, ntype)
def has_edges_between(self, u, v, etype=None): def has_edges_between(self, u, v, etype=None):
"""Whether the graph has some particular edge(s) of a given type. """Return whether the graph contains the given edges.
Parameters Parameters
---------- ----------
u : source node ID(s) u : node IDs
The source node(s) of the edges for query. The allowed formats are: The source node IDs of the edges. The allowed formats are:
- ``int``: The source node of an edge for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query. * Int Tensor: Each element is a node ID. The tensor must have the same device type
The data type and device of the tensor must be the same as the :py:attr:`idtype` and and ID data type as the graph's.
device of the graph. Its i-th element represents the source node ID of the * iterable[int]: Each element is a node ID.
i-th edge for query.
- ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence v : node IDs
(e.g. list, tuple, numpy.ndarray). The destination node IDs of the edges. The allowed formats are:
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u` * ``int``: A single node.
for destination nodes and should have the same format as :attr:`u`. If :attr:`u` * Int Tensor: Each element is a node ID. The tensor must have the same device type
and :attr:`v` are not int, they should have the same length. and ID data type as the graph's.
etype : str or tuple of str, optional * iterable[int]: Each element is a node ID.
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one etype : str or (str, str, str), optional
must use a canonical edge type. If the graph has multiple edge types, one must The type names of the edges. The allowed type name formats are:
specify the argument. Otherwise, it can be omitted.
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
bool or bool Tensor bool or bool Tensor
A tensor of bool flags where each element is True if the node is in the graph.
- If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool`` If the input is a single node, return one bool value.
indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
- If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
result will be a bool Tensor whose i-th element indicates whether there is an
edge from ``u[i]`` to ``v[i]`` of the given edge type.
Notes
-----
The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
number of nodes of the source and destination type.
Examples Examples
-------- --------
...@@ -2595,7 +2526,7 @@ class DGLHeteroGraph(object): ...@@ -2595,7 +2526,7 @@ class DGLHeteroGraph(object):
def has_edge_between(self, u, v, etype=None): def has_edge_between(self, u, v, etype=None):
"""Whether the graph has edges of type ``etype``. """Whether the graph has edges of type ``etype``.
DEPRECATED: please use :func:`~DGLGraph.has_edge_between`. **DEPRECATED**: please use :func:`~DGLGraph.has_edge_between`.
""" """
dgl_warning("DGLGraph.has_edge_between is deprecated. " dgl_warning("DGLGraph.has_edge_between is deprecated. "
"Please use DGLGraph.has_edges_between") "Please use DGLGraph.has_edges_between")
...@@ -2610,12 +2541,16 @@ class DGLHeteroGraph(object): ...@@ -2610,12 +2541,16 @@ class DGLHeteroGraph(object):
Parameters Parameters
---------- ----------
v : int v : int
The destination node for query. The node ID.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -2664,12 +2599,15 @@ class DGLHeteroGraph(object): ...@@ -2664,12 +2599,15 @@ class DGLHeteroGraph(object):
Parameters Parameters
---------- ----------
v : int v : int
The source node for query. The node ID.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -2720,48 +2658,51 @@ class DGLHeteroGraph(object): ...@@ -2720,48 +2658,51 @@ class DGLHeteroGraph(object):
return_uv=return_uv, etype=etype) return_uv=return_uv, etype=etype)
def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None): def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
"""Return the ID(s) of edge(s) from the given source node(s) to the given destination """Return the edge ID(s) given the two endpoints of the edge(s).
node(s) with the specified edge type.
Parameters Parameters
---------- ----------
u : source node ID(s) u : node IDs
The source node(s) of the edges for query. The allowed formats are: The source node IDs of the edges. The allowed formats are:
- ``int``: The source node of an edge for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose * Int Tensor: Each element is a node ID. The tensor must have the same device type
data type an device should be the same as the :py:attr:`idtype` and device of and ID data type as the graph's.
the graph. Its i-th element is the source node of the i-th edge for query. * iterable[int]: Each element is a node ID.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray). v : node IDs
v : destination node ID(s) The destination node IDs of the edges. The allowed formats are:
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u` * ``int``: A single node.
and :attr:`v` are not int, they should have the same length. * Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
force_multi : bool, optional force_multi : bool, optional
Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a **DEPRECATED**, use :attr:`return_uv` instead. Whether to allow the graph to be a
multigraph, i.e. there can be multiple edges from one node to another. multigraph, i.e. there can be multiple edges from one node to another.
return_uv : bool, optional return_uv : bool, optional
Whether to return the source and destination node IDs along with the edges. If Whether to return the source and destination node IDs along with the edges. If
False (default), it assumes that the graph is a simple graph and there is only False (default), it assumes that the graph is a simple graph and there is only
one edge from one node to another. If True, there can be multiple edges found one edge from one node to another. If True, there can be multiple edges found
from one node to another. from one node to another.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor, or (tensor, tensor, tensor) Tensor, or (Tensor, Tensor, Tensor)
* If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges. * If ``return_uv=False``, it returns the edge IDs in a tensor, where the i-th
If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th element is the ID of the edge ``(u[i], v[i])``.
element of the tensor is the ID of the edge ``(u[i], v[i])``.
* If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``. * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
from ``eu[i]`` to ``ev[i]`` in this case. (including parallel edges) from ``eu[i]`` to ``ev[i]`` in this case.
Notes Notes
----- -----
...@@ -2840,34 +2781,35 @@ class DGLHeteroGraph(object): ...@@ -2840,34 +2781,35 @@ class DGLHeteroGraph(object):
return F.as_scalar(eid) if is_int else eid return F.as_scalar(eid) if is_int else eid
def find_edges(self, eid, etype=None): def find_edges(self, eid, etype=None):
"""Return the source and destination node(s) of some particular edge(s) """Return the source and destination node ID(s) given the edge ID(s).
with the specified edge type.
Parameters Parameters
---------- ----------
eid : edge ID(s) eid : edge ID(s)
The IDs of the edges for query. The function expects that :attr:`eid` contains The edge IDs. The allowed formats are:
valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
:math:`E` is the number of edges with the specified edge type. * ``int``: A single ID.
* Int Tensor: Each element is an ID. The tensor must have the same device type
- ``int``: An edge ID for query. and ID data type as the graph's.
- ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data * iterable[int]: Each element is an ID.
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence etype : str or (str, str, str), optional
(e.g. list, tuple, numpy.ndarray). The type names of the edges. The allowed type name formats are:
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type * ``(str, str, str)`` for source node type, edge type and destination node type.
(3-tuple of str). When an edge type appears in multiple canonical edge types, one * or one ``str`` edge type name if the name can uniquely identify a
must use a canonical edge type. triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
Tensor Tensor
The source node IDs of the edges, whose i-th element is the source node of the edge The source node IDs of the edges. The i-th element is the source node ID of
with ID ``eid[i]``. the i-th edge.
Tensor Tensor
The destination node IDs of the edges, whose i-th element is the destination node of The destination node IDs of the edges. The i-th element is the destination node
the edge with ID ``eid[i]``. ID of the i-th edge.
Examples Examples
-------- --------
...@@ -2910,20 +2852,19 @@ class DGLHeteroGraph(object): ...@@ -2910,20 +2852,19 @@ class DGLHeteroGraph(object):
return src, dst return src, dst
def in_edges(self, v, form='uv', etype=None): def in_edges(self, v, form='uv', etype=None):
"""Return the incoming edges of some particular node(s) with the specified edge type. """Return the incoming edges of the given nodes.
Parameters Parameters
---------- ----------
v : destination node ID(s) v : node ID(s)
The destination node(s) for query. The allowed formats are: The node IDs. The allowed formats are:
- ``int``: The destination node for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data * Int Tensor: Each element is a node ID. The tensor must have the same device type
type and device should be the same as the :py:attr:`idtype` and device of the graph. and ID data type as the graph's.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence * iterable[int]: Each element is a node ID.
(e.g. list, tuple, numpy.ndarray).
form : str, optional form : str, optional
The return form, which can be one of the following: The result format, which can be one of the following:
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges. the IDs of all edges.
...@@ -2933,11 +2874,14 @@ class DGLHeteroGraph(object): ...@@ -2933,11 +2874,14 @@ class DGLHeteroGraph(object):
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges. representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -2992,18 +2936,17 @@ class DGLHeteroGraph(object): ...@@ -2992,18 +2936,17 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form)) raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def out_edges(self, u, form='uv', etype=None): def out_edges(self, u, form='uv', etype=None):
"""Return the outgoing edges of some particular node(s) with the specified edge type. """Return the outgoing edges of the given nodes.
Parameters Parameters
---------- ----------
u : source node ID(s) u : node ID(s)
The source node(s) for query. The allowed formats are: The node IDs. The allowed formats are:
- ``int``: The source node for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data * Int Tensor: Each element is a node ID. The tensor must have the same device type
type and device should be the same as the :py:attr:`idtype` and device of the graph. and ID data type as the graph's.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence * iterable[int]: Each element is a node ID.
(e.g. list, tuple, numpy.ndarray).
form : str, optional form : str, optional
The return form, which can be one of the following: The return form, which can be one of the following:
...@@ -3015,11 +2958,14 @@ class DGLHeteroGraph(object): ...@@ -3015,11 +2958,14 @@ class DGLHeteroGraph(object):
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges. representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -3159,43 +3105,41 @@ class DGLHeteroGraph(object): ...@@ -3159,43 +3105,41 @@ class DGLHeteroGraph(object):
def in_degree(self, v, etype=None): def in_degree(self, v, etype=None):
"""Return the in-degree of node ``v`` with edges of type ``etype``. """Return the in-degree of node ``v`` with edges of type ``etype``.
DEPRECATED: Please use in_degrees **DEPRECATED**: Please use in_degrees
""" """
dgl_warning("DGLGraph.in_degree is deprecated. Please use DGLGraph.in_degrees") dgl_warning("DGLGraph.in_degree is deprecated. Please use DGLGraph.in_degrees")
return self.in_degrees(v, etype) return self.in_degrees(v, etype)
def in_degrees(self, v=ALL, etype=None): def in_degrees(self, v=ALL, etype=None):
"""Return the in-degree(s) of some particular node(s) with the specified edge type. """Return the in-degree(s) of the given nodes.
It computes the in-degree(s) w.r.t. to the edges of the given edge type.
Parameters Parameters
---------- ----------
v : destination node ID(s), optional v : node IDs
The destination node(s) for query. The allowed formats are: The node IDs. The allowed formats are:
- ``int``: The destination node for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data * Int Tensor: Each element is a node ID. The tensor must have the same device type
type and device should be the same as the :py:attr:`idtype` and device of the graph. and ID data type as the graph's.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence * iterable[int]: Each element is a node ID.
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes. If not given, return the in-degrees of all the nodes.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type name of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor or int int or Tensor
The in-degree(s) of the node(s). The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree
of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
- If :attr:`v` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the in-degree of the
node ``v[i]``.
Examples Examples
-------- --------
...@@ -3251,36 +3195,35 @@ class DGLHeteroGraph(object): ...@@ -3251,36 +3195,35 @@ class DGLHeteroGraph(object):
return self.out_degrees(u, etype) return self.out_degrees(u, etype)
def out_degrees(self, u=ALL, etype=None): def out_degrees(self, u=ALL, etype=None):
"""Return the out-degree(s) of some particular node(s) with the specified edge type. """Return the out-degree(s) of the given nodes.
It computes the out-degree(s) w.r.t. to the edges of the given edge type.
Parameters Parameters
---------- ----------
u : source node ID(s), optional u : node IDs
The node IDs. The allowed formats are:
- ``int``: The source node for query. * ``int``: A single node.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data * Int Tensor: Each element is a node ID. The tensor must have the same device type
type and device should be the same as the :py:attr:`idtype` and device of the graph. and ID data type as the graph's.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence * iterable[int]: Each element is a node ID.
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes. If not given, return the in-degrees of all the nodes.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor or int int or Tensor
The out-degree(s) of the node(s). The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree
of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
- If :attr:`u` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the out-degree of the
node ``v[i]``.
Examples Examples
-------- --------
...@@ -3330,7 +3273,7 @@ class DGLHeteroGraph(object): ...@@ -3330,7 +3273,7 @@ class DGLHeteroGraph(object):
return deg return deg
def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None): def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :func:`adj`""" """Alias of :meth:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype) return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None): def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
...@@ -3351,12 +3294,15 @@ class DGLHeteroGraph(object): ...@@ -3351,12 +3294,15 @@ class DGLHeteroGraph(object):
scipy_fmt : str, optional scipy_fmt : str, optional
If specified, return a scipy sparse matrix in the given format. If specified, return a scipy sparse matrix in the given format.
Otherwise, return a backend dependent sparse tensor. (Default: None) Otherwise, return a backend dependent sparse tensor. (Default: None)
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a * ``(str, str, str)`` for source node type, edge type and destination node type.
particular edge type. If not given (default), it returns the total number of edges * or one ``str`` edge type name if the name can uniquely identify a
of all types. triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -3398,6 +3344,7 @@ class DGLHeteroGraph(object): ...@@ -3398,6 +3344,7 @@ class DGLHeteroGraph(object):
else: else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False) return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None): def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``. """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
""" """
...@@ -3407,10 +3354,6 @@ class DGLHeteroGraph(object): ...@@ -3407,10 +3354,6 @@ class DGLHeteroGraph(object):
return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt) return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)
def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
"""Alias of :func:`inc`"""
return self.inc(typestr, ctx, etype)
def inc(self, typestr, ctx=F.cpu(), etype=None): def inc(self, typestr, ctx=F.cpu(), etype=None):
"""Return the incidence matrix representation of edges with the given """Return the incidence matrix representation of edges with the given
edge type. edge type.
...@@ -3446,12 +3389,14 @@ class DGLHeteroGraph(object): ...@@ -3446,12 +3389,14 @@ class DGLHeteroGraph(object):
Can be either ``in``, ``out`` or ``both`` Can be either ``in``, ``out`` or ``both``
ctx : context, optional ctx : context, optional
The context of returned incidence matrix. (Default: cpu) The context of returned incidence matrix. (Default: cpu)
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a * ``(str, str, str)`` for source node type, edge type and destination node type.
particular edge type. If not given (default), it returns the total number of edges * or one ``str`` edge type name if the name can uniquely identify a
of all types. triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -3485,6 +3430,8 @@ class DGLHeteroGraph(object): ...@@ -3485,6 +3430,8 @@ class DGLHeteroGraph(object):
etid = self.get_etype_id(etype) etid = self.get_etype_id(etype)
return self._graph.incidence_matrix(etid, typestr, ctx)[0] return self._graph.incidence_matrix(etid, typestr, ctx)[0]
incidence_matrix = inc
################################################################# #################################################################
# Features # Features
################################################################# #################################################################
...@@ -3497,8 +3444,8 @@ class DGLHeteroGraph(object): ...@@ -3497,8 +3444,8 @@ class DGLHeteroGraph(object):
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The node type for query. If the graph has multiple node types, one must The node type name. Can be omitted if there is only one type of nodes
specify the argument. Otherwise, it can be omitted. in the graph.
Returns Returns
------- -------
...@@ -3544,11 +3491,15 @@ class DGLHeteroGraph(object): ...@@ -3544,11 +3491,15 @@ class DGLHeteroGraph(object):
Parameters Parameters
---------- ----------
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
...@@ -3614,9 +3565,7 @@ class DGLHeteroGraph(object): ...@@ -3614,9 +3565,7 @@ class DGLHeteroGraph(object):
The name of the feature that the initializer applies. If not given, the The name of the feature that the initializer applies. If not given, the
initializer applies to all features. initializer applies to all features.
ntype : str, optional ntype : str, optional
The type of the nodes that the initializer applies. If the graph has The type name of the nodes. Can be omitted if the graph has only one type of nodes.
multiple node types, one must specify the argument. Otherwise, it can
be omitted.
Notes Notes
----- -----
...@@ -3703,11 +3652,15 @@ class DGLHeteroGraph(object): ...@@ -3703,11 +3652,15 @@ class DGLHeteroGraph(object):
field : str, optional field : str, optional
The name of the feature that the initializer applies. If not given, the The name of the feature that the initializer applies. If not given, the
initializer applies to all features. initializer applies to all features.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type for query, which can be an edge type (str) or a canonical edge type The type names of the edges. The allowed type name formats are:
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must * ``(str, str, str)`` for source node type, edge type and destination node type.
specify the argument. Otherwise, it can be omitted. * or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes Notes
----- -----
...@@ -3945,27 +3898,50 @@ class DGLHeteroGraph(object): ...@@ -3945,27 +3898,50 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
def apply_nodes(self, func, v=ALL, ntype=None, inplace=False): def apply_nodes(self, func, v=ALL, ntype=None, inplace=False):
"""Apply the function on the nodes with the same type to update their """Update the features of the specified nodes by the provided function.
features.
If None is provided for ``func``, nothing will happen.
Parameters Parameters
---------- ----------
func : callable or None func : callable
Apply function on the nodes. The function should be The function to update node features. It must be
a :mod:`Node UDF <dgl.udf>`. a :ref:`apiudf`.
v : int or iterable of int or tensor, optional v : node IDs
The (type-specific) node (ids) on which to apply ``func``. (Default: ALL) The node IDs. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If not given (default), use all the nodes in the graph.
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node type The node type name. Can be omitted if there is
in the graph. (Default: None) only one type of nodes in the graph.
inplace : bool, optional inplace : bool, optional
**DEPRECATED**. If True, update will be done in place, but autograd will break. **DEPRECATED**.
(Default: False)
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['h'] = torch.ones(5, 2)
>>> g.apply_nodes(lambda nodes: {'x' : nodes.data['h'] * 2})
>>> g.ndata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])}) >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.ones(3, 5) >>> g.nodes['user'].data['h'] = torch.ones(3, 5)
>>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user') >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
...@@ -3990,27 +3966,76 @@ class DGLHeteroGraph(object): ...@@ -3990,27 +3966,76 @@ class DGLHeteroGraph(object):
self._set_n_repr(ntid, v, ndata) self._set_n_repr(ntid, v, ndata)
def apply_edges(self, func, edges=ALL, etype=None, inplace=False): def apply_edges(self, func, edges=ALL, etype=None, inplace=False):
"""Apply the function on the edges with the same type to update their """Update the features of the specified edges by the provided function.
features.
If None is provided for ``func``, nothing will happen.
Parameters Parameters
---------- ----------
func : callable func : dgl.function.BuiltinFunction or callable
Apply function on the edge. The function should be The function to generate new edge features. It must be either
an :mod:`Edge UDF <dgl.udf>`. a :ref:`api-built-in` or a :ref:`apiudf`.
edges : optional edges : edges
Edges on which to apply ``func``. See :func:`send` for valid The edges to update features on. The allowed input formats are:
edge specification. (Default: ALL)
etype : str or tuple of str, optional * ``int``: A single edge ID.
The edge type. Can be omitted if there is only one edge type * Int Tensor: Each element is an edge ID. The tensor must have the same device type
in the graph. (Default: None) and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* (Tensor, Tensor): The node-tensors format where the i-th elements
of the two tensors specify an edge.
* (iterable[int], iterable[int]): Similar to the node-tensors format but
stores edge endpoints in python iterables.
Default value specifies all the edges in the graph.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional inplace: bool, optional
**DEPRECATED**. Must be False. **DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`func` argument,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['h'] = torch.ones(5, 2)
>>> g.apply_edges(lambda edges: {'x' : edges.src['h'] + edges.dst['h']})
>>> g.edata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
Use built-in function
>>> import dgl.function as fn
>>> g.apply_edges(fn.u_add_v('h', 'h', 'x'))
>>> g.edata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])}) >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
>>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5) >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
>>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2}) >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
...@@ -4048,40 +4073,50 @@ class DGLHeteroGraph(object): ...@@ -4048,40 +4073,50 @@ class DGLHeteroGraph(object):
apply_node_func=None, apply_node_func=None,
etype=None, etype=None,
inplace=False): inplace=False):
"""Send messages along edges of the specified type, and let destinations """Send messages along the specified edges and reduce them on
receive them. the destination nodes to update their features.
Optionally, apply a function to update the node features after "receive".
This is a convenient combination for performing
:mod:`send <dgl.DGLHeteroGraph.send>` along the ``edges`` and
:mod:`recv <dgl.DGLHeteroGraph.recv>` for the destinations of the ``edges``.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].send_and_recv(edges, message_func, reduce_func,
apply_node_func, inplace=inplace)
Parameters Parameters
---------- ----------
edges : See :func:`send` for valid edge specification. edges : edges
Edges on which to apply ``func``. The edges to send and receive messages on. The allowed input formats are:
message_func : callable
Message function on the edges. The function should be * ``int``: A single edge ID.
an :mod:`Edge UDF <dgl.udf>`. * Int Tensor: Each element is an edge ID. The tensor must have the same device type
reduce_func : callable and ID data type as the graph's.
Reduce function on the node. The function should be * iterable[int]: Each element is an edge ID.
a :mod:`Node UDF <dgl.udf>`. * (Tensor, Tensor): The node-tensors format where the i-th elements
of the two tensors specify an edge.
* (iterable[int], iterable[int]): Similar to the node-tensors format but
stores edge endpoints in python iterables.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional inplace: bool, optional
**DEPRECATED**. Must be False. **DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
...@@ -4090,6 +4125,29 @@ class DGLHeteroGraph(object): ...@@ -4090,6 +4125,29 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn >>> import dgl.function as fn
>>> import torch >>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> # Specify edges using (Tensor, Tensor).
>>> g.send_and_recv(([1, 2], [2, 3]), fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[0., 0.],
[1., 1.],
[1., 1.],
[0., 0.]])
>>> # Specify edges using IDs.
>>> g.send_and_recv([0, 2, 3], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[0., 0.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]) ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
...@@ -4128,45 +4186,50 @@ class DGLHeteroGraph(object): ...@@ -4128,45 +4186,50 @@ class DGLHeteroGraph(object):
apply_node_func=None, apply_node_func=None,
etype=None, etype=None,
inplace=False): inplace=False):
"""Pull messages from the node(s)' predecessors and then update their features. """Pull messages from the specified node(s)' predecessors along the
specified edge type, aggregate them to update the node features.
Optionally, apply a function to update the node features after receive.
This is equivalent to :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>`
on the incoming edges of ``v`` with the specified type.
Other notes:
* `reduce_func` will be skipped for nodes with no incoming messages.
* If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
* If some ``v`` have no incoming message, their new feature value will be calculated
by the column initializer (see :func:`set_n_initializer`). The feature shapes and
dtypes will be inferred.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].pull(v, message_func, reduce_func, apply_node_func, inplace=inplace)
Parameters Parameters
---------- ----------
v : int, container or tensor, optional v : node IDs
The node(s) to be updated. The node IDs. The allowed formats are:
message_func : callable
Message function on the edges. The function should be * ``int``: A single node.
an :mod:`Edge UDF <dgl.udf>`. * Int Tensor: Each element is a node ID. The tensor must have the same device type
reduce_func : callable and ID data type as the graph's.
Reduce function on the node. The function should be * iterable[int]: Each element is a node ID.
a :mod:`Node UDF <dgl.udf>`.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional inplace: bool, optional
**DEPRECATED**. Must be False. **DEPRECATED**.
Notes
-----
* If some of the given nodes :attr:`v` has no in-edges, DGL does not invoke
message and reduce functions for these nodes and fill their aggregated messages
with zero. Users can control the filled values via :meth:`set_n_initializer`.
DGL still invokes :attr:`apply_node_func` if provided.
* DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
...@@ -4175,7 +4238,19 @@ class DGLHeteroGraph(object): ...@@ -4175,7 +4238,19 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn >>> import dgl.function as fn
>>> import torch >>> import torch
Instantiate a heterograph. **Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.pull([0, 3, 4], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
...@@ -4214,36 +4289,46 @@ class DGLHeteroGraph(object): ...@@ -4214,36 +4289,46 @@ class DGLHeteroGraph(object):
apply_node_func=None, apply_node_func=None,
etype=None, etype=None,
inplace=False): inplace=False):
"""Send message from the node(s) to their successors and update them. """Send message from the specified node(s) to their successors
along the specified edge type and update their node features.
This is equivalent to performing
:mod:`send_and_recv <DGLHeteroGraph.send_and_recv>` along the outbound
edges from ``u``.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].push(u, message_func, reduce_func, apply_node_func, inplace=inplace)
Parameters Parameters
---------- ----------
u : int, container or tensor v : node IDs
The node(s) to push out messages. The node IDs. The allowed formats are:
message_func : callable
Message function on the edges. The function should be * ``int``: A single node.
an :mod:`Edge UDF <dgl.udf>`. * Int Tensor: Each element is a node ID. The tensor must have the same device type
reduce_func : callable and ID data type as the graph's.
Reduce function on the node. The function should be * iterable[int]: Each element is a node ID.
a :mod:`Node UDF <dgl.udf>`.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional inplace: bool, optional
**DEPRECATED**. Must be False. **DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
...@@ -4252,7 +4337,19 @@ class DGLHeteroGraph(object): ...@@ -4252,7 +4337,19 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn >>> import dgl.function as fn
>>> import torch >>> import torch
Instantiate a heterograph. **Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.push([0, 1], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[1., 1.],
[0., 0.],
[0., 0.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])}) >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
...@@ -4275,42 +4372,59 @@ class DGLHeteroGraph(object): ...@@ -4275,42 +4372,59 @@ class DGLHeteroGraph(object):
reduce_func, reduce_func,
apply_node_func=None, apply_node_func=None,
etype=None): etype=None):
"""Send messages through all edges and update all nodes. """Send messages along all the edges of the specified type
and update all the nodes of the corresponding destination type.
Optionally, apply a function to update the node features after receive.
This is equivalent to
:mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>` over all edges
of the specified type.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].update_all(message_func, reduce_func, apply_node_func)
Parameters Parameters
---------- ----------
message_func : callable message_func : dgl.function.BuiltinFunction or callable
Message function on the edges. The function should be The message function to generate messages along the edges.
an :mod:`Edge UDF <dgl.udf>`. It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : callable reduce_func : dgl.function.BuiltinFunction or callable
Reduce function on the node. The function should be The reduce function to aggregate the messages.
a :mod:`Node UDF <dgl.udf>`. It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes
-----
* If some of the nodes in the graph has no in-edges, DGL does not invoke
message and reduce functions for these nodes and fill their aggregated messages
with zero. Users can control the filled values via :meth:`set_n_initializer`.
DGL still invokes :attr:`apply_node_func` if provided.
* DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
>>> import torch
>>> import dgl >>> import dgl
>>> import dgl.function as fn >>> import dgl.function as fn
>>> import torch
Instantiate a heterograph. **Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])}) >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})
...@@ -4335,35 +4449,48 @@ class DGLHeteroGraph(object): ...@@ -4335,35 +4449,48 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None): def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None):
r"""Send and receive messages along all edges. r"""Send messages along all the edges, reduce them by first type-wisely
then across different types, and then update the node features of all
This is equivalent to the nodes.
:mod:`multi_send_and_recv <dgl.DGLHeteroGraph.multi_send_and_recv>`
over all edges.
Parameters Parameters
---------- ----------
etype_dict : dict etype_dict : dict
Mapping an edge type (str or tuple of str) to the type specific Arguments for edge-type-wise message passing. The keys are edge types
configuration (3-tuples). Each 3-tuple represents while the values are message passing arguments.
(msg_func, reduce_func, apply_node_func):
The allowed key formats are:
* msg_func: callable
Message function on the edges. The function should be * ``(str, str, str)`` for source node type, edge type and destination node type.
an :mod:`Edge UDF <dgl.udf>`. * or one ``str`` edge type name if the name can uniquely identify a
* reduce_func: callable triplet format in the graph.
Reduce function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. The value must be a tuple ``(message_func, reduce_func, [apply_node_func])``, where
* message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
* reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
* apply_node_func : callable, optional * apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
cross_reducer : str cross_reducer : str
Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"``. Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"``.
apply_node_func : callable apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function after the messages are reduced both
a :mod:`Node UDF <dgl.udf>`. (Default: None) type-wisely and across different types.
inplace: bool, optional It must be a :ref:`apiudf`.
**DEPRECATED**. Must be False.
Notes
-----
DGL recommends using DGL's bulit-in function for the message_func
and the reduce_func in the type-wise message passing arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples Examples
-------- --------
...@@ -4431,21 +4558,27 @@ class DGLHeteroGraph(object): ...@@ -4431,21 +4558,27 @@ class DGLHeteroGraph(object):
Parameters Parameters
---------- ----------
nodes_generator : iterable, each element is a list or a tensor of node ids nodes_generator : iterable[node IDs]
The generator of node frontiers. It specifies which nodes perform The generator of node frontiers. Each frontier is a set of node IDs
:func:`pull` at each timestep. stored in Tensor or python iterables.
message_func : callable It specifies which nodes perform :func:`pull` at each step.
Message function on the edges. The function should be message_func : dgl.function.BuiltinFunction or callable
an :mod:`Edge UDF <dgl.udf>`. The message function to generate messages along the edges.
reduce_func : callable It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
Reduce function on the node. The function should be reduce_func : dgl.function.BuiltinFunction or callable
a :mod:`Node UDF <dgl.udf>`. The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Examples Examples
-------- --------
...@@ -4492,18 +4625,23 @@ class DGLHeteroGraph(object): ...@@ -4492,18 +4625,23 @@ class DGLHeteroGraph(object):
---------- ----------
edges_generator : generator edges_generator : generator
The generator of edge frontiers. The generator of edge frontiers.
message_func : callable message_func : dgl.function.BuiltinFunction or callable
Message function on the edges. The function should be The message function to generate messages along the edges.
an :mod:`Edge UDF <dgl.udf>`. It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : callable reduce_func : dgl.function.BuiltinFunction or callable
Reduce function on the node. The function should be The reduce function to aggregate the messages.
a :mod:`Node UDF <dgl.udf>`. It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional apply_node_func : callable, optional
Apply function on the nodes. The function should be An optional apply function to further update the node features
a :mod:`Node UDF <dgl.udf>`. (Default: None) after the message reduction. It must be a :ref:`apiudf`.
etype : str, optional etype : str or (str, str, str), optional
The edge type. Can be omitted if there is only one edge type The type name of the edges. The allowed type name formats are:
in the graph. (Default: None)
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Examples Examples
-------- --------
...@@ -4563,7 +4701,7 @@ class DGLHeteroGraph(object): ...@@ -4563,7 +4701,7 @@ class DGLHeteroGraph(object):
Returns Returns
------- -------
tensor Tensor
A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate. A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.
Examples Examples
...@@ -4630,30 +4768,31 @@ class DGLHeteroGraph(object): ...@@ -4630,30 +4768,31 @@ class DGLHeteroGraph(object):
Its output tensor should be a 1D boolean tensor with Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding edge in each element indicating whether the corresponding edge in
the batch satisfies the predicate. the batch satisfies the predicate.
edges : edge ID(s) or edge end nodes, optional edges : edges
The edge(s) for query. The allowed formats are: The edges to send and receive messages on. The allowed input formats are:
- Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data * ``int``: A single edge ID.
type and device should be the same as the :py:attr:`idtype` and device of the graph. * Int Tensor: Each element is an edge ID. The tensor must have the same device type
- iterable[int]: Similar to the tensor, but stores edge IDs in a sequence and ID data type as the graph's.
(e.g. list, tuple, numpy.ndarray). * iterable[int]: Each element is an edge ID.
- (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple * (Tensor, Tensor): The node-tensors format where the i-th elements
edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this of the two tensors specify an edge.
format "tuple of node-tensors". The data type and device of the tensors should * (iterable[int], iterable[int]): Similar to the node-tensors format but
be the same as the :py:attr:`idtype` and device of the graph. stores edge endpoints in python iterables.
- (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray). By default, it considers all the edges.
etype : str or (str, str, str), optional
By default, it considers all edges. The type name of the edges. The allowed type name formats are:
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type * ``(str, str, str)`` for source node type, edge type and destination node type.
(3-tuple of str). When an edge type appears in multiple canonical edge types, one * or one ``str`` edge type name if the name can uniquely identify a
must use a canonical edge type. If the graph has multiple edge types, one must triplet format in the graph.
specify the argument. Otherwise, it can be omitted.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor Tensor
A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate. A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.
Examples Examples
......
"""Package for neural network common components.""" """The ``dgl.nn`` package contains framework-specific implementations for
common Graph Neural Network layers (or module in PyTorch, Block in MXNet).
Users can directly import ``dgl.nn.<layer_name>`` (e.g., ``dgl.nn.GraphConv``),
and the package will dispatch the layer name to the actual implementation
according to the backend framework currently in use.
Note that there are coverage differences among frameworks. If you encounter
an ``ImportError: cannot import name 'XXX'`` error, that means the layer is
not available to the current backend. If you wish a module to appear in DGL,
please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with
"[Feature Request] NN Module XXXModel". If you want to contribute a NN module,
please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
with "[NN] XXX module".
"""
import importlib import importlib
import sys import sys
import os import os
......
...@@ -8,14 +8,12 @@ from . import ndarray as nd ...@@ -8,14 +8,12 @@ from . import ndarray as nd
__all__ = ['seed'] __all__ = ['seed']
def seed(val): def seed(val):
"""Set the seed of randomized methods in DGL. """Set the random seed of DGL.
The randomized methods include various samplers and random walk routines.
Parameters Parameters
---------- ----------
val : int val : int
The seed The seed.
""" """
_CAPI_SetSeed(val) _CAPI_SetSeed(val)
...@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name ...@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
It out-performs numpy for non-uniform sampling in general cases. It out-performs numpy for non-uniform sampling in general cases.
TODO(minjie): support RNG as one of the arguments.
Parameters Parameters
---------- ----------
a : 1-D tensor or int a : 1-D tensor or int
...@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name ...@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
samples : 1-D tensor samples : 1-D tensor
The generated random samples The generated random samples
""" """
#TODO(minjie): support RNG as one of the arguments.
if isinstance(size, tuple): if isinstance(size, tuple):
num = np.prod(size) num = np.prod(size)
else: else:
......
...@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None): ...@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
feat : str feat : str
Node feature name. Node feature name.
weight : str, optional weight : str, optional
Node weight name. If None, no weighting will be performed, Node weight name. None means aggregating without weights.
otherwise, weight each node feature with field :attr:`feat`. Otherwise, multiply each node feature by node feature :attr:`weight`
for aggregation. The weight feature shape must be compatible with before aggregation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor. an element-wise multiplication with the feature tensor.
op : str, optional op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'. Readout operator. Can be 'sum', 'max', 'min', 'mean'.
...@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None): ...@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
Returns Returns
------- -------
tensor Tensor
Result tensor. Result tensor.
Examples Examples
...@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None): ...@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
Parameters Parameters
---------- ----------
graph : DGLGraph. graph : DGLGraph.
Input graph. The input graph.
feat : str feat : str
Edge feature name. The edge feature name.
weight : str, optional weight : str, optional
Edge weight name. If None, no weighting will be performed, The edge weight feature name. If None, no weighting will be performed,
otherwise, weight each edge feature with field :attr:`feat`. otherwise, weight each edge feature with field :attr:`feat`.
for summation. The weight feature shape must be compatible with for summation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor. an element-wise multiplication with the feature tensor.
op : str, optional op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'. Readout operator. Can be 'sum', 'max', 'min', 'mean'.
etype : str, tuple of str, optional etype : str or (str, str, str), optional
Edge type. Can be omitted if there is only one edge type in the graph. The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor Tensor
Result tensor. Result tensor.
Examples Examples
...@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None): ...@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
def sum_nodes(graph, feat, weight=None, *, ntype=None): def sum_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``. """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``.
See Also
--------
readout_nodes
""" """
return readout_nodes(graph, feat, weight, ntype=ntype, op='sum') return readout_nodes(graph, feat, weight, ntype=ntype, op='sum')
def sum_edges(graph, feat, weight=None, *, etype=None): def sum_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``. """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``.
See Also
--------
readout_edges
""" """
return readout_edges(graph, feat, weight, etype=etype, op='sum') return readout_edges(graph, feat, weight, etype=etype, op='sum')
def mean_nodes(graph, feat, weight=None, *, ntype=None): def mean_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``. """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``.
See Also
--------
readout_nodes
""" """
return readout_nodes(graph, feat, weight, ntype=ntype, op='mean') return readout_nodes(graph, feat, weight, ntype=ntype, op='mean')
def mean_edges(graph, feat, weight=None, *, etype=None): def mean_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``. """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``.
See Also
--------
readout_edges
""" """
return readout_edges(graph, feat, weight, etype=etype, op='mean') return readout_edges(graph, feat, weight, etype=etype, op='mean')
def max_nodes(graph, feat, weight=None, *, ntype=None): def max_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``. """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``.
See Also
--------
readout_nodes
""" """
return readout_nodes(graph, feat, weight, ntype=ntype, op='max') return readout_nodes(graph, feat, weight, ntype=ntype, op='max')
def max_edges(graph, feat, weight=None, *, etype=None): def max_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``. """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``.
See Also
--------
readout_edges
""" """
return readout_edges(graph, feat, weight, etype=etype, op='max') return readout_edges(graph, feat, weight, etype=etype, op='max')
...@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None): ...@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None):
Parameters Parameters
---------- ----------
graph : DGLGraph. graph : DGLGraph.
Input graph. The input graph.
feat : str feat : str
Node feature name. The node feature name.
ntype : str, optional ntype : str, optional
Node type. Can be omitted if there is only one node type in the graph. The node type name. Can be omitted if there is only one node type in the graph.
Returns Returns
------- -------
tensor Tensor
Result tensor. Result tensor.
Examples Examples
...@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None): ...@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None):
Parameters Parameters
---------- ----------
graph : DGLGraph. graph : DGLGraph.
Input graph. The input graph.
feat : str feat : str
Edge feature name. The edge feature name.
etype : str, typle of str, optional etype : str or (str, str, str), optional
Edge type. Can be omitted if there is only one edge type in the graph. The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns Returns
------- -------
tensor Tensor
Result tensor. Result tensor.
Examples Examples
...@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype): ...@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype):
topk_indices topk_indices
def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None): def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in """Return a graph-level representation by a graph-wise top-k on
:attr:`graph` by feature at index :attr:`sortby`. If :attr: node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
`descending` is set to False, return the k smallest elements instead.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling all dimensions independently, equivalent to calling
...@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None): ...@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
:math:`B` is the batch size of the input graph, :math:`D` :math:`B` is the batch size of the input graph, :math:`D`
is the feature size. is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples Examples
-------- --------
...@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None): ...@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2], [3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]])) [2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
""" """
return _topk_on(graph, 'nodes', feat, k, return _topk_on(graph, 'nodes', feat, k,
descending=descending, sortby=sortby, descending=descending, sortby=sortby,
ntype_or_etype=ntype) ntype_or_etype=ntype)
def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None): def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in """Return a graph-level representation by a graph-wise top-k
:attr:`graph` by feature at index :attr:`sortby`. If :attr: on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
`descending` is set to False, return the k smallest elements instead.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling all dimensions independently, equivalent to calling
...@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None): ...@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
:math:`B` is the batch size of the input graph, :math:`D` :math:`B` is the batch size of the input graph, :math:`D`
is the feature size. is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples Examples
-------- --------
...@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None): ...@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2], [3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]])) [2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
""" """
return _topk_on(graph, 'edges', feat, k, return _topk_on(graph, 'edges', feat, k,
descending=descending, sortby=sortby, descending=descending, sortby=sortby,
......
"""This module contains the implementations of various sampling operators. """The ``dgl.sampling`` package contains operators and utilities for
sampling from a graph via random walks, neighbor sampling, etc. They
are typically used together with the ``DataLoader`` s in the
``dgl.dataloading`` package. The user guide :ref:`guide-minibatch`
gives a holistic explanation on how different components work together.
""" """
from .randomwalks import * from .randomwalks import *
from .pinsage import * from .pinsage import *
from .neighbor import * from .neighbor import *
...@@ -18,92 +18,102 @@ __all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_su ...@@ -18,92 +18,102 @@ __all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_su
'in_subgraph', 'out_subgraph'] 'in_subgraph', 'out_subgraph']
def node_subgraph(graph, nodes): def node_subgraph(graph, nodes):
"""Return the subgraph induced on given nodes. """Return a subgraph induced on the given nodes.
The metagraph of the returned subgraph is the same as the parent graph. A node-induced subgraph is a subset of the nodes of a graph together with
Features are copied from the original graph. any edges whose endpoints are both in this subset. In addition to extracting
the subgraph, DGL conducts the following:
* Relabel the extracted nodes to IDs starting from zero.
* Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
* Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters Parameters
---------- ----------
graph : DGLGraph graph : DGLGraph
The graph to extract subgraphs from. The graph to extract subgraphs from.
nodes : list or dict[str->list or iterable] nodes : nodes or dict[str, nodes]
A dictionary mapping node types to node ID array for constructing The nodes to form the subgraph. The allowed nodes formats are:
subgraph. All nodes must exist in the graph.
If the graph only has one node type, one can just specify a list, * Int Tensor: Each element is a node ID. The tensor must have the same device type
tensor, or any iterable of node IDs intead. and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
* Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
node :math:`i` is in the subgraph.
The node ID array can be either an interger tensor or a bool tensor. If the graph is homogeneous, one can directly pass the above formats.
When a bool tensor is used, it is automatically converted to Otherwise, the argument must be a dictionary with keys being node types
an interger tensor using the semantic of np.where(nodes_idx == True). and values being the nodes.
Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
tensors are supported.
Returns Returns
------- -------
G : DGLGraph G : DGLGraph
The subgraph. The subgraph.
The nodes and edges in the subgraph are relabeled using consecutive
integers from 0.
One can retrieve the mapping from subgraph node/edge ID to parent
node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
subgraph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> sg = dgl.node_subgraph(g, [0, 1, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 2]), tensor([1, 0]))
>>> sg.ndata[dgl.NID] # original node IDs
tensor([0, 1, 4])
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 4])
Specify nodes using a boolean mask.
>>> nodes = torch.tensor([True, True, False, False, True]) # choose nodes [0, 1, 4]
>>> dgl.node_subgraph(g, nodes)
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
The resulting subgraph also copies features from the parent graph.
>>> g.ndata['x'] = torch.arange(10).view(5, 2)
>>> sg = dgl.node_subgraph(g, [0, 1, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.ndata['x']
tensor([[0, 1],
[2, 3],
[8, 9]])
Extract a subgraph from a hetergeneous graph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
>>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
>>> }) >>> })
>>> # Set node features >>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> sub_g
Get subgraphs.
>>> g.subgraph({'user': [4, 5]})
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.subgraph({'user': [1, 2]})
>>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 0},
num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get subgraphs using boolean mask tensor.
>>> sub_g = g.subgraph({'user': th.tensor([False, True, True])})
>>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 0}, Graph(num_nodes={'user': 2, 'game': 0},
num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2}, num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')]) metagraph=[('user', 'game'), ('user', 'user')])
Get the original node/edge indices.
>>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
tensor([1, 2])
>>> sub_g['follows'].edata[dgl.EID] # Get the edge indices in the raw graph
tensor([1, 2])
Get the copied node features.
>>> sub_g.nodes['user'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.nodes['user'].data['h'] += 1
>>> g.nodes['user'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See Also See Also
-------- --------
edge_subgraph edge_subgraph
...@@ -129,106 +139,124 @@ def node_subgraph(graph, nodes): ...@@ -129,106 +139,124 @@ def node_subgraph(graph, nodes):
induced_edges = sgi.induced_edges induced_edges = sgi.induced_edges
return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges) return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
DGLHeteroGraph.subgraph = node_subgraph DGLHeteroGraph.subgraph = utils.alias_func(node_subgraph)
def edge_subgraph(graph, edges, preserve_nodes=False): def edge_subgraph(graph, edges, preserve_nodes=False):
"""Return the subgraph induced on given edges. """Return a subgraph induced on the given edges.
The metagraph of the returned subgraph is the same as the parent graph.
Features are copied from the original graph. An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
Parameters the subgraph, DGL conducts the following:
----------
graph : DGLGraph
The graph to extract subgraphs from.
edges : dict[(str, str, str), Tensor]
A dictionary mapping edge types to edge ID array for constructing
subgraph. All edges must exist in the subgraph.
The edge types are characterized by triplets of * Relabel the incident nodes to IDs starting from zero. Isolated nodes are removed.
``(src type, etype, dst type)``.
If the graph only has one edge type, one can just specify a list, * Copy the features of the extracted nodes and edges to the resulting graph.
tensor, or any iterable of edge IDs intead. The copy is *lazy* and incurs data movement only when needed.
The edge ID array can be either an interger tensor or a bool tensor. * Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
When a bool tensor is used, it is automatically converted to of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
an interger tensor using the semantic of np.where(edges_idx == True).
Note: When using bool tensor, only backend (torch, tensorflow, mxnet) If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
tensors are supported. them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
preserve_nodes : bool Parameters
Whether to preserve all nodes or not. If false, all nodes ----------
without edges will be removed. (Default: False) graph : DGLGraph
The graph to extract the subgraph from.
edges : dict[(str, str, str), edges]
The edges to form the subgraph. The allowed edges formats are:
* Int Tensor: Each element is an edge ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
edge :math:`i` is in the subgraph.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being edge types
and values being the nodes.
preserve_nodes : bool, optional
If true, do not relabel the incident nodes and remove the isolated nodes
in the extracted subgraph. (Default: False)
Returns Returns
------- -------
G : DGLGraph G : DGLGraph
The subgraph. The subgraph.
The nodes and edges are relabeled using consecutive integers from 0.
One can retrieve the mapping from subgraph node/edge ID to parent
node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
subgraph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> sg = dgl.edge_subgraph(g, [0, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 1]), tensor([2, 0]))
>>> sg.ndata[dgl.NID] # original node IDs
tensor([0, 4, 1])
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 4])
Extract a subgraph without node relabeling.
>>> sg = dgl.edge_subgraph(g, [0, 4], preserve_nodes=True)
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 4]), tensor([1, 0]))
Specify edges using a boolean mask.
>>> nodes = torch.tensor([True, False, False, False, True]) # choose edges [0, 4]
>>> dgl.edge_subgraph(g, nodes)
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
The resulting subgraph also copies features from the parent graph.
>>> g.ndata['x'] = torch.arange(10).view(5, 2)
>>> sg = dgl.edge_subgraph(g, [0, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.ndata[dgl.NID]
tensor([0, 4, 1])
>>> sg.ndata['x']
tensor([[0, 1],
[8, 9],
[2, 3]])
Extract a subgraph from a hetergeneous graph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
>>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
>>> }) >>> })
>>> # Set edge features >>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2],
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]]) ... ('user', 'plays', 'game'): [2]})
Get subgraphs.
>>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
>>> ('user', 'plays', 'game'): [2]})
>>> print(sub_g) >>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 1}, Graph(num_nodes={'user': 2, 'game': 1},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2}, num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')]) metagraph=[('user', 'game'), ('user', 'user')])
Get subgraphs using boolean mask tensor.
>>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): th.tensor([False, True, True]),
>>> ('user', 'plays', 'game'): th.tensor([False, False, True, False])})
>>> sub_g
Graph(num_nodes={'user': 2, 'game': 1},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get the original node/edge indices.
>>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
tensor([1, 2])
>>> sub_g['plays'].edata[dgl.EID] # Get the edge indices in the raw graph
tensor([2])
Get the copied node features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See Also See Also
-------- --------
subgraph node_subgraph
""" """
if graph.is_block: if graph.is_block:
raise DGLError('Extracting subgraph from a block graph is not allowed.') raise DGLError('Extracting subgraph from a block graph is not allowed.')
...@@ -252,73 +280,82 @@ def edge_subgraph(graph, edges, preserve_nodes=False): ...@@ -252,73 +280,82 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
induced_nodes = sgi.induced_nodes induced_nodes = sgi.induced_nodes
return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges) return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
DGLHeteroGraph.edge_subgraph = edge_subgraph DGLHeteroGraph.edge_subgraph = utils.alias_func(edge_subgraph)
def in_subgraph(g, nodes): def in_subgraph(g, nodes):
"""Return the subgraph induced on the inbound edges of all edge types of the """Return the subgraph induced on the inbound edges of all the edge types of the
given nodes. given nodes.
All the nodes are preserved regardless of whether they have an edge or not. An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
the subgraph, DGL conducts the following:
The metagraph of the returned subgraph is the same as the parent graph. * Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Features are copied from the original graph. * Store the IDs of the extracted edges in the ``edata``
of the resulting graph under name ``dgl.EID``.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
Full graph structure. The input graph.
nodes : tensor or dict nodes : nodes or dict[str, nodes]
Node ids to sample neighbors from. The allowed types The nodes to form the subgraph. The allowed nodes formats are:
are dictionary of node types to node id tensors, or simply node id tensor if
the given graph g has only one type of nodes. * Int Tensor: Each element is an ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the nodes.
Returns Returns
------- -------
DGLGraph DGLGraph
The subgraph. The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.in_subgraph(g, [2, 0])
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([1, 4]), tensor([2, 0]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([1, 4])
>>> sg.edata['w'] # also extract the features
tensor([[2, 3],
[8, 9]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])}) ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.in_subgraph({'user': [2], 'game': [2]}) >>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
>>> print(sub_g) >>> sub_g
Graph(num_nodes={'game': 3, 'user': 3}, Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2}, num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')]) metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also See also
-------- --------
out_subgraph out_subgraph
...@@ -341,73 +378,82 @@ def in_subgraph(g, nodes): ...@@ -341,73 +378,82 @@ def in_subgraph(g, nodes):
induced_edges = sgi.induced_edges induced_edges = sgi.induced_edges
return _create_hetero_subgraph(g, sgi, None, induced_edges) return _create_hetero_subgraph(g, sgi, None, induced_edges)
DGLHeteroGraph.in_subgraph = in_subgraph DGLHeteroGraph.in_subgraph = utils.alias_func(in_subgraph)
def out_subgraph(g, nodes): def out_subgraph(g, nodes):
"""Return the subgraph induced on the outbound edges of all edge types of the """Return the subgraph induced on the out-bound edges of all the edge types of the
given nodes. given nodes.
All the nodes are preserved regardless of whether they have an edge or not. An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
the subgraph, DGL conducts the following:
The metagraph of the returned subgraph is the same as the parent graph. * Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Features are copied from the original graph. * Store the IDs of the extracted edges in the ``edata``
of the resulting graph under name ``dgl.EID``.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
Full graph structure. The input graph.
nodes : tensor or dict nodes : nodes or dict[str, nodes]
Node ids to sample neighbors from. The allowed types The nodes to form the subgraph. The allowed nodes formats are:
are dictionary of node types to node id tensors, or simply node id tensor if
the given graph g has only one type of nodes. * Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the nodes.
Returns Returns
------- -------
DGLGraph DGLGraph
The subgraph. The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.out_subgraph(g, [2, 0])
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([2, 0]), tensor([3, 1]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([2, 0])
>>> sg.edata['w'] # also extract the features
tensor([[4, 5],
[0, 1]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])}) ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.out_subgraph({'user': [1]}) >>> sub_g = g.out_subgraph({'user': [1]})
>>> print(sub_g) >>> sub_g
Graph(num_nodes={'game': 3, 'user': 3}, Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2}, num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')]) metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([1, 2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also See also
-------- --------
in_subgraph in_subgraph
...@@ -430,22 +476,23 @@ def out_subgraph(g, nodes): ...@@ -430,22 +476,23 @@ def out_subgraph(g, nodes):
induced_edges = sgi.induced_edges induced_edges = sgi.induced_edges
return _create_hetero_subgraph(g, sgi, None, induced_edges) return _create_hetero_subgraph(g, sgi, None, induced_edges)
DGLHeteroGraph.out_subgraph = out_subgraph DGLHeteroGraph.out_subgraph = utils.alias_func(out_subgraph)
def node_type_subgraph(graph, ntypes): def node_type_subgraph(graph, ntypes):
"""Return the subgraph induced on given node types. """Return the subgraph induced on given node types.
The metagraph of the returned subgraph is the subgraph of the original A node-type-induced subgraph contains all the nodes of the given subset of
metagraph induced from the node types. the node types of a graph and any edges whose endpoints are both in this subset.
In addition to extracting the subgraph, DGL also copies the features of the
Features are shared with the original graph. extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Parameters Parameters
---------- ----------
graph : DGLGraph graph : DGLGraph
The graph to extract subgraphs from. The graph to extract subgraphs from.
ntypes : list[str] ntypes : list[str]
The node types The type names of the nodes in the subgraph.
Returns Returns
------- -------
...@@ -456,6 +503,9 @@ def node_type_subgraph(graph, ntypes): ...@@ -456,6 +503,9 @@ def node_type_subgraph(graph, ntypes):
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterograph. Instantiate a heterograph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
...@@ -473,17 +523,12 @@ def node_type_subgraph(graph, ntypes): ...@@ -473,17 +523,12 @@ def node_type_subgraph(graph, ntypes):
ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)} ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}
edata_schemes={}) edata_schemes={})
Get the shared node features. Get the extracted node features.
>>> sub_g.nodes['user'].data['h'] >>> sub_g.nodes['user'].data['h']
tensor([[0.], tensor([[0.],
[1.], [1.],
[2.]]) [2.]])
>>> sub_g.nodes['user'].data['h'] += 1
>>> g.nodes['user'].data['h'] # Features are shared.
tensor([[1.],
[2.],
[3.]])
See Also See Also
-------- --------
...@@ -498,22 +543,28 @@ def node_type_subgraph(graph, ntypes): ...@@ -498,22 +543,28 @@ def node_type_subgraph(graph, ntypes):
etypes.append(graph.canonical_etypes[etid]) etypes.append(graph.canonical_etypes[etid])
return edge_type_subgraph(graph, etypes) return edge_type_subgraph(graph, etypes)
DGLHeteroGraph.node_type_subgraph = node_type_subgraph DGLHeteroGraph.node_type_subgraph = utils.alias_func(node_type_subgraph)
def edge_type_subgraph(graph, etypes): def edge_type_subgraph(graph, etypes):
"""Return the subgraph induced on given edge types. """Return the subgraph induced on given edge types.
The metagraph of the returned subgraph is the subgraph of the original metagraph An edge-type-induced subgraph contains all the edges of the given subset of
induced from the edge types. the edge types of a graph and the nodes incident by those edges.
In addition to extracting the subgraph, DGL also copies the features of the
Features are shared with the original graph. extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Parameters Parameters
---------- ----------
graph : DGLGraph graph : DGLGraph
The graph to extract subgraphs from. The graph to extract subgraphs from.
etypes : list[str or tuple] etypes : list[str] or list[(str, str, str)]
The edge types The type names of the edges in the subgraph. The allowed type name
formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` for the edge type name if the name can uniquely identify a
triplet format in the graph.
Returns Returns
------- -------
...@@ -524,6 +575,9 @@ def edge_type_subgraph(graph, etypes): ...@@ -524,6 +575,9 @@ def edge_type_subgraph(graph, etypes):
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterograph. Instantiate a heterograph.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
...@@ -536,7 +590,7 @@ def edge_type_subgraph(graph, etypes): ...@@ -536,7 +590,7 @@ def edge_type_subgraph(graph, etypes):
Get subgraphs. Get subgraphs.
>>> sub_g = g.edge_type_subgraph(['follows']) >>> sub_g = g.edge_type_subgraph(['follows'])
>>> print(sub_g) >>> sub_g
Graph(num_nodes=3, num_edges=3, Graph(num_nodes=3, num_edges=3,
ndata_schemes={} ndata_schemes={}
edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}) edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)})
...@@ -547,11 +601,6 @@ def edge_type_subgraph(graph, etypes): ...@@ -547,11 +601,6 @@ def edge_type_subgraph(graph, etypes):
tensor([[0.], tensor([[0.],
[1.], [1.],
[2.]]) [2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are shared.
tensor([[1.],
[2.],
[3.]])
See Also See Also
-------- --------
...@@ -579,7 +628,7 @@ def edge_type_subgraph(graph, etypes): ...@@ -579,7 +628,7 @@ def edge_type_subgraph(graph, etypes):
hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames) hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames)
return hg return hg
DGLHeteroGraph.edge_type_subgraph = edge_type_subgraph DGLHeteroGraph.edge_type_subgraph = utils.alias_func(edge_type_subgraph)
#################### Internal functions #################### #################### Internal functions ####################
......
...@@ -59,30 +59,29 @@ def pairwise_squared_distance(x): ...@@ -59,30 +59,29 @@ def pairwise_squared_distance(x):
#pylint: disable=invalid-name #pylint: disable=invalid-name
def knn_graph(x, k): def knn_graph(x, k):
"""Convert a tensor into k-nearest-neighbor (KNN) graph(s) according """Construct a graph from a set of points according to k-nearest-neighbor (KNN)
to Euclidean distance. and return.
The function transforms the coordinates/features of a point set The function transforms the coordinates/features of a point set
into a directed homogeneous graph. The coordinates of the point into a directed homogeneous graph. The coordinates of the point
set is specified as a matrix whose rows correspond to points and set is specified as a matrix whose rows correspond to points and
columns correspond to coordinate/feature dimensions. columns correspond to coordinate/feature dimensions.
The nodes of the returned graph correspond to the points. An edge The nodes of the returned graph correspond to the points, where the predecessors
exists if the source node is one of the k-nearest neighbors of the of each point are its k-nearest neighbors measured by the Euclidean distance.
destination node.
If you give a 3D tensor, then each submatrix will be transformed If :attr:`x` is a 3D tensor, then each submatrix will be transformed
into a separate graph. DGL then composes the graphs into a large into a separate graph. DGL then composes the graphs into a large
graph of multiple connected components. graph of multiple connected components.
Parameters Parameters
---------- ----------
x : 2D or 3D Tensor x : Tensor
The input tensor. It can be either on CPU or GPU. The point coordinates. It can be either on CPU or GPU.
* If 2D, ``x[i]`` corresponds to the i-th node in the KNN graph. * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
* If 3D, ``x[i]`` corresponds to the i-th KNN graph and * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and
``x[i][j]`` corresponds to the j-th node in the i-th KNN graph. ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
k : int k : int
The number of nearest neighbors per node. The number of nearest neighbors per node.
...@@ -90,7 +89,7 @@ def knn_graph(x, k): ...@@ -90,7 +89,7 @@ def knn_graph(x, k):
Returns Returns
------- -------
DGLGraph DGLGraph
The graph. The node IDs are in the same order as :attr:`x`. The constructred graph. The node IDs are in the same order as :attr:`x`.
The returned graph is on CPU, regardless of the context of input :attr:`x`. The returned graph is on CPU, regardless of the context of input :attr:`x`.
...@@ -152,22 +151,25 @@ def knn_graph(x, k): ...@@ -152,22 +151,25 @@ def knn_graph(x, k):
#pylint: disable=invalid-name #pylint: disable=invalid-name
def segmented_knn_graph(x, k, segs): def segmented_knn_graph(x, k, segs):
"""Convert a tensor into multiple k-nearest-neighbor (KNN) graph(s) """Construct multiple graphs from multiple sets of points according to
with different number of nodes. k-nearest-neighbor (KNN) and return.
Each chunk of :attr:`x` contains coordinates/features of a point set. Compared with :func:`dgl.knn_graph`, this allows multiple point sets with
different capacity. The points from different sets are stored contiguously
in the :attr:`x` tensor.
:attr:`segs` specifies the number of points in each point set. The :attr:`segs` specifies the number of points in each point set. The
function constructs a KNN graph for each point set, where the predecessors function constructs a KNN graph for each point set, where the predecessors
of each point are its k-nearest neighbors. DGL then composes all KNN graphs of each point are its k-nearest neighbors measured by the Euclidean distance.
DGL then composes all KNN graphs
into a graph with multiple connected components. into a graph with multiple connected components.
Parameters Parameters
---------- ----------
x : 2D Tensor x : Tensor
Coordinates/features of points. It can be either on CPU or GPU. Coordinates/features of points. Must be 2D. It can be either on CPU or GPU.
k : int k : int
The number of nearest neighbors per node. The number of nearest neighbors per node.
segs : list of int segs : list[int]
Number of points in each point set. The numbers in :attr:`segs` Number of points in each point set. The numbers in :attr:`segs`
must sum up to the number of rows in :attr:`x`. must sum up to the number of rows in :attr:`x`.
...@@ -222,37 +224,28 @@ def segmented_knn_graph(x, k, segs): ...@@ -222,37 +224,28 @@ def segmented_knn_graph(x, k, segs):
return convert.from_scipy(adj) return convert.from_scipy(adj)
def to_bidirected(g, readonly=None, copy_ndata=False): def to_bidirected(g, copy_ndata=False, readonly=None):
r"""Convert the graph to a bidirectional simple graph, adding reverse edges and r"""Convert the graph to a bi-directional simple graph and return.
removing parallel edges.
The function generates a new graph with no edge features. In the new graph,
a single edge ``(u, v)`` exists if and only if there exists an edge connecting ``u``
to ``v`` or an edge connecting ``v`` to ``u`` in the original graph.
For a heterogeneous graph with multiple edge types, DGL treats edges corresponding For an input graph :math:`G`, return a new graph :math:`G'` such that an edge
to each type as a separate graph and convert the graph to a bidirected one :math:`(u, v)\in G'` if and only if there exists an edge :math:`(u, v)\in G` or
for each of them. an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph,
meaning there is no parallel edge.
Since :func:`to_bidirected` **is not well defined for unidirectional The operation only works for edges whose two endpoints belong to the same node type.
bipartite graphs**, DGL will raise an error if an edge type whose source node type is DGL will raise error if the input graph is heterogeneous and contains edges
different from the destination node type exists. with different types of endpoints.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The input graph. The input graph.
readonly : bool
Deprecated. There will be no difference between readonly and non-readonly
(Default: True)
copy_ndata: bool, optional copy_ndata: bool, optional
If True, the node features of the bidirected graph are copied from the If True, the node features of the bidirected graph are copied from the
original graph. original graph. If False, the bidirected graph will not have any node features.
If False, the bidirected graph will not have any node features.
(Default: False) (Default: False)
readonly : bool
**DEPRECATED**.
Returns Returns
------- -------
...@@ -261,10 +254,9 @@ def to_bidirected(g, readonly=None, copy_ndata=False): ...@@ -261,10 +254,9 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
Notes Notes
----- -----
If :attr:`copy_ndata` is True, same tensors will be used for If :attr:`copy_ndata` is True, the resulting graph will share the node feature
the features of the original graph and the returned graph to save memory cost. tensors with the input graph. Hence, users should try to avoid in-place operations
As a result, users should avoid performing in-place operations on the features of which will be visible to both graphs.
the returned graph, which will corrupt the features of the original graph as well.
Examples Examples
-------- --------
...@@ -314,24 +306,21 @@ def to_bidirected(g, readonly=None, copy_ndata=False): ...@@ -314,24 +306,21 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
def add_reverse_edges(g, readonly=None, copy_ndata=True, def add_reverse_edges(g, readonly=None, copy_ndata=True,
copy_edata=False, ignore_bipartite=False): copy_edata=False, ignore_bipartite=False):
r"""Add reverse edges to a graph. r"""Add an reversed edge for each edge in the input graph and return a new graph.
For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
function creates a new graph with edges function creates a new graph with edges
:math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`. :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.
For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding The operation only works for edges whose two endpoints belong to the same node type.
to each type as a separate graph and add reverse edges for each of them. DGL will raise error if the input graph is heterogeneous and contains edges
with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will
Since :func:`add_reverse_edges` **is not well defined for unidirectional bipartite graphs**, ignore those edges instead.
an error will be raised if an edge type of the input heterogeneous graph is for a
unidirectional bipartite graph. DGL simply skips the edge types corresponding
to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The input graph. Can be on either CPU or GPU. The input graph.
readonly : bool, default to be True readonly : bool, default to be True
Deprecated. There will be no difference between readonly and non-readonly Deprecated. There will be no difference between readonly and non-readonly
copy_ndata: bool, optional copy_ndata: bool, optional
...@@ -360,13 +349,10 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True, ...@@ -360,13 +349,10 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
Notes Notes
----- -----
If :attr:`copy_ndata` is True, same tensors are used as If :attr:`copy_ndata` is True, the resulting graph will share the node feature
the node features of the original graph and the new graph. tensors with the input graph. Hence, users should try to avoid in-place operations
As a result, users should avoid performing in-place operations which will be visible to both graphs. On the contrary, the two graphs do not share
on the node features of the new graph to avoid feature corruption. the same edge feature storage.
On the contrary, edge features are concatenated,
and they are not shared due to concatenation.
Examples Examples
-------- --------
...@@ -377,7 +363,7 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True, ...@@ -377,7 +363,7 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
>>> bg1.edges() >>> bg1.edges()
(tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0])) (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))
**Heterogeneous graphs with Multiple Edge Types** **Heterogeneous graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])), >>> ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
...@@ -489,12 +475,11 @@ def line_graph(g, backtracking=True, shared=False): ...@@ -489,12 +475,11 @@ def line_graph(g, backtracking=True, shared=False):
Notes Notes
----- -----
If :attr:`shared` is True, same tensors will be used for * If :attr:`shared` is True, the node features of the resulting graph share the same
the features of the original graph and the returned graph to save memory cost. storage with the edge features of the input graph. Hence, users should try to
As a result, users should avoid performing in-place operations on the features of avoid in-place operations which will be visible to both graphs.
the returned graph, which will corrupt the features of the original graph as well.
The implementation is done on CPU, even if the input and output graphs are on GPU. * The function supports input graph on GPU but copies it to CPU during computation.
Examples Examples
-------- --------
...@@ -532,15 +517,13 @@ def line_graph(g, backtracking=True, shared=False): ...@@ -532,15 +517,13 @@ def line_graph(g, backtracking=True, shared=False):
return lg return lg
DGLHeteroGraph.line_graph = line_graph DGLHeteroGraph.line_graph = utils.alias_func(line_graph)
def khop_adj(g, k): def khop_adj(g, k):
"""Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
:math:`g`, where rows represent source nodes and columns represent destination nodes. :math:`g`.
The returned matrix is a 32-bit float dense matrix on CPU.
The graph must be homogeneous. The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous.
Parameters Parameters
---------- ----------
...@@ -551,7 +534,7 @@ def khop_adj(g, k): ...@@ -551,7 +534,7 @@ def khop_adj(g, k):
Returns Returns
------- -------
tensor Tensor
The returned tensor. The returned tensor.
Examples Examples
...@@ -607,10 +590,9 @@ def khop_graph(g, k, copy_ndata=True): ...@@ -607,10 +590,9 @@ def khop_graph(g, k, copy_ndata=True):
Notes Notes
----- -----
If :attr:`copy_ndata` is True, same tensors will be used for If :attr:`copy_ndata` is True, the resulting graph will share the node feature
the features of the original graph and the returned graph to save memory cost. tensors with the input graph. Hence, users should try to avoid in-place operations
As a result, users should avoid performing in-place operations on the features of which will be visible to both graphs.
the returned graph, which will corrupt the features of the original graph as well.
Examples Examples
-------- --------
...@@ -656,19 +638,11 @@ def khop_graph(g, k, copy_ndata=True): ...@@ -656,19 +638,11 @@ def khop_graph(g, k, copy_ndata=True):
return new_g return new_g
def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None): def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
r"""Return the reverse of a graph. r"""Return a new graph with every edges being the reverse ones in the input graph.
The reverse (also called converse, transpose) of a graph with edges The reverse (also called converse, transpose) of a graph with edges
:math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges :math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges
:math:`(j_1, i_1), (j_2, i_2), \cdots`. :math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``.
For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
to each type as a separate graph and compute the reverse for each of them.
If the original edge type is ``(A, B, C)``, its reverse will have edge type
``(C, B, A)``.
Given a :class:`DGLGraph` object, DGL returns another :class:`DGLGraph`
object representing its reverse.
Parameters Parameters
---------- ----------
...@@ -676,17 +650,11 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda ...@@ -676,17 +650,11 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
The input graph. The input graph.
copy_ndata: bool, optional copy_ndata: bool, optional
If True, the node features of the reversed graph are copied from the If True, the node features of the reversed graph are copied from the
original graph. original graph. If False, the reversed graph will not have any node features.
If False, the reversed graph will not have any node features.
(Default: True) (Default: True)
copy_edata: bool, optional copy_edata: bool, optional
If True, the edge features of the reversed graph are copied from the If True, the edge features of the reversed graph are copied from the
original graph. original graph. If False, the reversed graph will not have any edge features.
If False, the reversed graph will not have any edge features.
(Default: False) (Default: False)
Return Return
...@@ -696,14 +664,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda ...@@ -696,14 +664,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
Notes Notes
----- -----
If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors will be used for If :attr:`copy_ndata` or :attr:`copy_edata` is True,
the features of the original graph and the reversed graph to save memory cost. the resulting graph will share the node or edge feature
As a result, users should avoid performing in-place operations on the features of tensors with the input graph. Hence, users should try to avoid in-place operations
the reversed graph, which will corrupt the features of the original graph as well. which will be visible to both graphs.
Examples Examples
-------- --------
**Homogeneous graphs or Heterogeneous graphs with A Single Edge Type** **Homogeneous graphs**
Create a graph to reverse. Create a graph to reverse.
...@@ -731,24 +699,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda ...@@ -731,24 +699,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
[4.], [4.],
[5.]]) [5.]])
**In-place operations on features of one graph will be reflected on features of **Heterogenenous graphs**
its reverse, which is dangerous. Out-place operations will not be reflected.**
>>> rg.ndata['h'] += 1
>>> g.ndata['h']
tensor([[1.],
[2.],
[3.]])
>>> g.ndata['h'] += 1
>>> rg.ndata['h']
tensor([[2.],
[3.],
[4.]])
>>> rg.ndata['h2'] = th.ones(3, 1)
>>> 'h2' in g.ndata
False
**Heterogenenous graphs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])), ... ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
...@@ -758,7 +709,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda ...@@ -758,7 +709,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
>>> g.edges['plays'].data['he'] = th.zeros(3, 1) >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
The resulting graph will have edge types The resulting graph will have edge types
``('user', 'follows', 'user)`` and ``('user', 'plays', 'game')``. ``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``.
>>> rg = dgl.reverse(g, copy_ndata=True) >>> rg = dgl.reverse(g, copy_ndata=True)
>>> rg >>> rg
...@@ -804,7 +755,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda ...@@ -804,7 +755,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
return new_g return new_g
DGLHeteroGraph.reverse = reverse DGLHeteroGraph.reverse = utils.alias_func(reverse)
def to_simple_graph(g): def to_simple_graph(g):
"""Convert the graph to a simple graph with no multi-edge. """Convert the graph to a simple graph with no multi-edge.
...@@ -874,14 +825,15 @@ def to_bidirected_stale(g, readonly=True): ...@@ -874,14 +825,15 @@ def to_bidirected_stale(g, readonly=True):
def laplacian_lambda_max(g): def laplacian_lambda_max(g):
"""Return the largest eigenvalue of the normalized symmetric Laplacian of a graph. """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
If the graph is batched from multiple graphs, return the list of the largest eigenvalue If the graph is batched from multiple graphs, return the list of the largest eigenvalue
for each graph instead. for each graph instead.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The input graph, it should be an undirected graph. It must be homogeneous. The input graph, it must be a bi-directed homogeneous graph, i.e., every edge
should have an accompanied reverse edge in the graph.
The graph can be batched from multiple graphs. The graph can be batched from multiple graphs.
Returns Returns
...@@ -938,7 +890,7 @@ def metapath_reachable_graph(g, metapath): ...@@ -938,7 +890,7 @@ def metapath_reachable_graph(g, metapath):
Returns Returns
------- -------
DGLGraph DGLGraph
A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
whether the input graph is on CPU or GPU. whether the input graph is on CPU or GPU.
Examples Examples
...@@ -970,21 +922,20 @@ def metapath_reachable_graph(g, metapath): ...@@ -970,21 +922,20 @@ def metapath_reachable_graph(g, metapath):
return new_g return new_g
def add_nodes(g, num, data=None, ntype=None): def add_nodes(g, num, data=None, ntype=None):
r"""Append new nodes of the given node type. r"""Add the given number of nodes to the graph and return a new graph.
The new nodes will have IDs starting from ``g.number_of_nodes(ntype)``. The new nodes will have IDs starting from ``g.num_nodes(ntype)``.
A new graph with newly added nodes is returned.
Parameters Parameters
---------- ----------
num : int num : int
Number of nodes to add. The number of nodes to add.
data : dict, optional data : dict[str, Tensor], optional
Feature data of the added nodes. Feature data of the added nodes. The keys are feature names
while the values are feature data.
ntype : str, optional ntype : str, optional
The type of the new nodes. Can be omitted if there is The node type name. Can be omitted if there is
only one node type in the graph. only one type of nodes in the graph.
Return Return
------ ------
...@@ -993,11 +944,10 @@ def add_nodes(g, num, data=None, ntype=None): ...@@ -993,11 +944,10 @@ def add_nodes(g, num, data=None, ntype=None):
Notes Notes
----- -----
* If the key of :attr:`data` does not contain some existing feature fields, * For features in :attr:`g` but not in :attr:`data`,
those features for the new nodes will be filled with zeros). DGL assigns zero features for the newly added nodes.
* For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
* If the key of :attr:`data` contains new feature fields, those features for for the existing nodes in the graph.
the old nodes will be filled zeros).
Examples Examples
-------- --------
...@@ -1007,7 +957,7 @@ def add_nodes(g, num, data=None, ntype=None): ...@@ -1007,7 +957,7 @@ def add_nodes(g, num, data=None, ntype=None):
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_nodes() >>> g.num_nodes()
...@@ -1017,26 +967,26 @@ def add_nodes(g, num, data=None, ntype=None): ...@@ -1017,26 +967,26 @@ def add_nodes(g, num, data=None, ntype=None):
5 5
If the graph has some node features and new nodes are added without If the graph has some node features and new nodes are added without
features, their features will be created with zeros. features, their features will be filled with zeros.
>>> g.ndata['h'] = torch.ones(5, 1) >>> g.ndata['h'] = torch.ones(5, 1)
>>> g = dgl.add_nodes(g, 1) >>> g = dgl.add_nodes(g, 1)
>>> g.ndata['h'] >>> g.ndata['h']
tensor([[1.], [1.], [1.], [1.], [1.], [0.]]) tensor([[1.], [1.], [1.], [1.], [1.], [0.]])
You can also assign features for the new nodes in adding new nodes. Assign features for the new nodes.
>>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)}) >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
>>> g.ndata['h'] >>> g.ndata['h']
tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]]) tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])
Since :attr:`data` contains new feature fields, the features for old nodes Since :attr:`data` contains new feature fields, the features for existing nodes
will be created with zeros. will be filled with zeros.
>>> g.ndata['w'] >>> g.ndata['w']
tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]]) tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
...@@ -1061,25 +1011,28 @@ def add_nodes(g, num, data=None, ntype=None): ...@@ -1061,25 +1011,28 @@ def add_nodes(g, num, data=None, ntype=None):
return g return g
def add_edges(g, u, v, data=None, etype=None): def add_edges(g, u, v, data=None, etype=None):
r"""Append multiple new edges for the specified edge type. r"""Add the edges to the graph and return a new graph.
A new graph with newly added edges is returned.
The i-th new edge will be from ``u[i]`` to ``v[i]``. The IDs of the new The i-th new edge will be from ``u[i]`` to ``v[i]``. The IDs of the new
edges will start from ``g.number_of_edges(etype)``. edges will start from ``g.num_edges(etype)``.
Parameters Parameters
---------- ----------
u : int, tensor, numpy.ndarray, list u : int, Tensor or iterable[int]
Source node IDs, ``u[i]`` gives the source node for the i-th new edge. Source node IDs, ``u[i]`` gives the source node for the i-th new edge.
v : int, tensor, numpy.ndarray, list v : int, Tensor or iterable[int]
Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge. Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge.
data : dict, optional data : dict[str, Tensor], optional
Feature data of the added edges. The i-th row of the feature data Feature data of the added edges. The keys are feature names
corresponds to the i-th new edge. while the values are feature data.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The type of the new edges. Can be omitted if there is The type names of the edges. The allowed type name formats are:
only one edge type in the graph.
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Return Return
------ ------
...@@ -1088,15 +1041,13 @@ def add_edges(g, u, v, data=None, etype=None): ...@@ -1088,15 +1041,13 @@ def add_edges(g, u, v, data=None, etype=None):
Notes Notes
----- -----
* If end nodes of adding edges does not exists, add_nodes is invoked * If the end nodes of the given edges do not exist in :attr:`g`,
to add new nodes. The node features of the new nodes will be created :func:`dgl.add_nodes` is invoked to add those nodes.
with zeros. The node features of the new nodes will be filled with zeros.
* For features in :attr:`g` but not in :attr:`data`,
* If the key of :attr:`data` does not contain some existing feature fields, DGL assigns zero features for the newly added nodes.
those features for the new edges will be created with zeros. * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
for the existing nodes in the graph.
* If the key of :attr:`data` contains new feature fields, those features for
the old edges will be created with zeros.
Examples Examples
-------- --------
...@@ -1105,7 +1056,7 @@ def add_edges(g, u, v, data=None, etype=None): ...@@ -1105,7 +1056,7 @@ def add_edges(g, u, v, data=None, etype=None):
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_edges() >>> g.num_edges()
...@@ -1121,7 +1072,7 @@ def add_edges(g, u, v, data=None, etype=None): ...@@ -1121,7 +1072,7 @@ def add_edges(g, u, v, data=None, etype=None):
4 4
If the graph has some edge features and new edges are added without If the graph has some edge features and new edges are added without
features, their features will be created with zeros. features, their features will be filled with zeros.
>>> g.edata['h'] = torch.ones(4, 1) >>> g.edata['h'] = torch.ones(4, 1)
>>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1])) >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
...@@ -1136,12 +1087,12 @@ def add_edges(g, u, v, data=None, etype=None): ...@@ -1136,12 +1087,12 @@ def add_edges(g, u, v, data=None, etype=None):
tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]]) tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
Since :attr:`data` contains new feature fields, the features for old edges Since :attr:`data` contains new feature fields, the features for old edges
will be created with zeros. will be filled with zeros.
>>> g.edata['w'] >>> g.edata['w']
tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]]) tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])
**Heterogeneous Graphs with Multiple Edge Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
...@@ -1166,22 +1117,24 @@ def add_edges(g, u, v, data=None, etype=None): ...@@ -1166,22 +1117,24 @@ def add_edges(g, u, v, data=None, etype=None):
return g return g
def remove_edges(g, eids, etype=None): def remove_edges(g, eids, etype=None):
r"""Remove multiple edges with the specified edge type. r"""Remove the specified edges and return a new graph.
A new graph with certain edges deleted is returned.
Nodes will not be removed. After removing edges, the rest
edges will be re-indexed using consecutive integers from 0,
with their relative order preserved.
The features for the removed edges will be removed accordingly. Also delete the features of the edges. The edges must exist in the graph.
The resulting graph has the same number of the nodes as the input one,
even if some nodes become isolated after the the edge removal.
Parameters Parameters
---------- ----------
eids : int, tensor, numpy.ndarray, list eids : int, Tensor, iterable[int]
IDs for the edges to remove. The IDs of the edges to remove.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The type of the edges to remove. Can be omitted if there is The type names of the edges. The allowed type name formats are:
only one edge type in the graph.
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Return Return
------ ------
...@@ -1193,7 +1146,7 @@ def remove_edges(g, eids, etype=None): ...@@ -1193,7 +1146,7 @@ def remove_edges(g, eids, etype=None):
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
...@@ -1207,7 +1160,7 @@ def remove_edges(g, eids, etype=None): ...@@ -1207,7 +1160,7 @@ def remove_edges(g, eids, etype=None):
>>> g.edata['he'] >>> g.edata['he']
tensor([[2.]]) tensor([[2.]])
**Heterogeneous Graphs with Multiple Edge Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
...@@ -1231,19 +1184,16 @@ def remove_edges(g, eids, etype=None): ...@@ -1231,19 +1184,16 @@ def remove_edges(g, eids, etype=None):
def remove_nodes(g, nids, ntype=None): def remove_nodes(g, nids, ntype=None):
r"""Remove multiple nodes with the specified node type. r"""Remove the specified nodes and return a new graph.
A new graph with certain nodes deleted is returned.
Edges that connect to the nodes will be removed as well. After removing
nodes and edges, the rest nodes and edges will be re-indexed using
consecutive integers from 0, with their relative order preserved.
The features for the removed nodes/edges will be removed accordingly. Also delete the features. Edges that connect from/to the nodes will be
removed as well. After the removal, DGL re-labels the remaining nodes and edges
with IDs from 0.
Parameters Parameters
---------- ----------
nids : int, tensor, numpy.ndarray, list nids : int, Tensor, iterable[int]
Nodes to remove. The nodes to be removed.
ntype : str, optional ntype : str, optional
The type of the nodes to remove. Can be omitted if there is The type of the nodes to remove. Can be omitted if there is
only one node type in the graph. only one node type in the graph.
...@@ -1259,7 +1209,7 @@ def remove_nodes(g, nids, ntype=None): ...@@ -1259,7 +1209,7 @@ def remove_nodes(g, nids, ntype=None):
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
...@@ -1274,7 +1224,7 @@ def remove_nodes(g, nids, ntype=None): ...@@ -1274,7 +1224,7 @@ def remove_nodes(g, nids, ntype=None):
>>> g.edata['he'] >>> g.edata['he']
tensor([[2.]]) tensor([[2.]])
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
...@@ -1301,42 +1251,41 @@ def remove_nodes(g, nids, ntype=None): ...@@ -1301,42 +1251,41 @@ def remove_nodes(g, nids, ntype=None):
return g return g
def add_self_loop(g, etype=None): def add_self_loop(g, etype=None):
r"""Add self-loop for each node in the graph for the given edge type. r"""Add self-loops for each node in the graph and return a new graph.
A new graph with self-loop is returned.
If the graph is heterogeneous, the given edge type must have its source
node type the same as its destination node type.
Parameters Parameters
---------- ----------
g : DGLGraph g : DGLGraph
The graph. The graph.
etype : str or tuple of str, optional etype : str or (str, str, str), optional
The type of the edges to remove. Can be omitted if there is The type names of the edges. The allowed type name formats are:
only one edge type in the graph.
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Its source node type must be the same as its destination node type. Can be omitted if the graph has only one type of edges.
Return Return
------ ------
DGLGraph DGLGraph
The graph with self-loop. The graph with self-loops.
Notes Notes
----- -----
* :func:`add_self_loop` adds self loops regardless of whether the self-loop already exists. * The function only supports homogeneous graphs or heterogeneous graphs but
the relation graph specified by the :attr:`etype` argument is homogeneous.
If you would like to have exactly one self-loop for every node, you would need to * The function adds self-loops regardless of whether they already exist or not.
If one wishes to have exactly one self-loop for every node,
call :func:`remove_self_loop` before invoking :func:`add_self_loop`. call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
* Features of the new edges (self-loop edges) will be filled with zeros.
* Features for the new edges (self-loop edges) will be created with zeros.
Examples Examples
-------- --------
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
...@@ -1354,7 +1303,7 @@ def add_self_loop(g, etype=None): ...@@ -1354,7 +1303,7 @@ def add_self_loop(g, etype=None):
[0.], [0.],
[0.]]) [0.]])
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([1, 2]), ... ('user', 'follows', 'user'): (torch.tensor([1, 2]),
...@@ -1377,20 +1326,28 @@ def add_self_loop(g, etype=None): ...@@ -1377,20 +1326,28 @@ def add_self_loop(g, etype=None):
new_g = add_edges(g, nodes, nodes, etype=etype) new_g = add_edges(g, nodes, nodes, etype=etype)
return new_g return new_g
DGLHeteroGraph.add_self_loop = add_self_loop DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)
def remove_self_loop(g, etype=None): def remove_self_loop(g, etype=None):
r""" Remove self loops for each node in the graph. r""" Remove self-loops for each node in the graph and return a new graph.
A new graph with self-loop removed is returned.
If there are multiple self loops for a certain node,
all of them will be removed.
Parameters Parameters
---------- ----------
etype : str or tuple of str, optional g : DGLGraph
The type of the edges to remove. Can be omitted if there is The graph.
only one edge type in the graph. etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes
-----
If a node has multiple self-loops, remove them all. Do nothing for nodes without
self-loops.
Examples Examples
--------- ---------
...@@ -1398,7 +1355,7 @@ def remove_self_loop(g, etype=None): ...@@ -1398,7 +1355,7 @@ def remove_self_loop(g, etype=None):
>>> import dgl >>> import dgl
>>> import torch >>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** **Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2]))) >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
>>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
...@@ -1409,7 +1366,7 @@ def remove_self_loop(g, etype=None): ...@@ -1409,7 +1366,7 @@ def remove_self_loop(g, etype=None):
>>> g.edata['he'] >>> g.edata['he']
tensor([[0.],[3.]]) tensor([[0.],[3.]])
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]), ... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
...@@ -1442,7 +1399,7 @@ def remove_self_loop(g, etype=None): ...@@ -1442,7 +1399,7 @@ def remove_self_loop(g, etype=None):
new_g = remove_edges(g, self_loop_eids, etype=etype) new_g = remove_edges(g, self_loop_eids, etype=etype)
return new_g return new_g
DGLHeteroGraph.remove_self_loop = remove_self_loop DGLHeteroGraph.remove_self_loop = utils.alias_func(remove_self_loop)
def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True): def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
"""Given a list of graphs with the same set of nodes, find and eliminate the common """Given a list of graphs with the same set of nodes, find and eliminate the common
...@@ -1502,10 +1459,9 @@ def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=Tru ...@@ -1502,10 +1459,9 @@ def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=Tru
This function currently requires that the same node type of all graphs should have This function currently requires that the same node type of all graphs should have
the same node type ID, i.e. the node types are *ordered* the same. the same node type ID, i.e. the node types are *ordered* the same.
If :attr:`copy_edata` is True, same tensors will be used for If :attr:`copy_edata` is True, the resulting graph will share the edge feature
the features of the original graphs and the returned graphs to save memory cost. tensors with the input graph. Hence, users should try to avoid in-place operations
As a result, users should avoid performing in-place operations on the edge features of which will be visible to both graphs.
the returned graph, which will corrupt the edge features of the original graph as well.
Examples Examples
-------- --------
...@@ -1666,6 +1622,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True): ...@@ -1666,6 +1622,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
have at least one inbound edge. have at least one inbound edge.
Notes
-----
:func:`to_block` is most commonly used in customizing neighborhood sampling
for stochastic training on a large graph. Please refer to the user guide
:ref:`guide-minibatch` for a more thorough discussion about the methodology
of stochastic training.
Examples Examples
-------- --------
Converting a homogeneous graph to a block as described above: Converting a homogeneous graph to a block as described above:
...@@ -1727,13 +1690,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True): ...@@ -1727,13 +1690,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
>>> block.srcnodes['A'].data[dgl.NID] >>> block.srcnodes['A'].data[dgl.NID]
tensor([2, 1]) tensor([2, 1])
Notes
-----
:func:`to_block` is most commonly used in customizing neighborhood sampling
for stochastic training on a large graph. Please refer to User Guide Chapter 6
for a more thorough discussion driven by the methodology of stochastic training on a
large graph.
""" """
assert g.device == F.cpu(), 'the graph must be on CPU' assert g.device == F.cpu(), 'the graph must be on CPU'
...@@ -1781,14 +1737,19 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True): ...@@ -1781,14 +1737,19 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
return new_graph return new_graph
def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False): def to_simple(g,
r"""Convert a graph to a simple graph, removing the parallel edges. return_counts='count',
writeback_mapping=False,
copy_ndata=True,
copy_edata=False):
r"""Convert a graph to a simple graph without parallel edges and return.
For a heterogeneous graph with multiple edge types, DGL removes the parallel edges For a heterogeneous graph with multiple edge types, DGL treats edges with the same
with the same edge type. edge type and endpoints as parallel edges and removes them.
Optionally, one can get the the number of parallel edges by specifying the
Optionally, the number of parallel edges and/or the mapping from the edges in the simple graph :attr:`return_counts` argument. To get the a mapping from the edge IDs in the
to the edges in the original graph is returned. input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping`
to true.
Parameters Parameters
---------- ----------
...@@ -1801,10 +1762,10 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True ...@@ -1801,10 +1762,10 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
(Default: "count") (Default: "count")
writeback_mapping: bool, optional writeback_mapping: bool, optional
If True, a write-back mapping is returned for each edge If True, return an extra write-back mapping for each edge
type subgraph. The write-back mapping is a tensor recording type. The write-back mapping is a tensor recording
the mapping from the IDs of the edges in the new graph to the mapping from the edge IDs in the input graph to
the IDs of the edges in the original graph. If the graph is the edge IDs in the result graph. If the graph is
heterogeneous, DGL returns a dictionary of edge types and such heterogeneous, DGL returns a dictionary of edge types and such
tensors. tensors.
...@@ -1833,21 +1794,17 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True ...@@ -1833,21 +1794,17 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
DGLGraph DGLGraph
The graph. The graph.
tensor or dict of tensor tensor or dict of tensor
The writeback mapping. The writeback mapping. Only when ``writeback_mapping`` is True.
Only returned if ``writeback_mapping`` is True.
Notes Notes
----- -----
If ``copy_ndata`` is ``True``, same tensors will be used for If :attr:`copy_ndata` is True, the resulting graph will share the node feature
the features of the original graph and the to_simpled graph. As a result, users tensors with the input graph. Hence, users should try to avoid in-place operations
should avoid performing in-place operations on the features of the to_simpled which will be visible to both graphs.
graph, which will corrupt the features of the original graph as well. For
concrete examples, refer to the ``Examples`` section below.
Examples Examples
-------- --------
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** **Homogeneous Graphs**
Create a graph for demonstrating to_simple API. Create a graph for demonstrating to_simple API.
In the original graph, there are multiple edges between 1 and 2. In the original graph, there are multiple edges between 1 and 2.
...@@ -1881,24 +1838,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True ...@@ -1881,24 +1838,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
>>> 'h' in g.edata >>> 'h' in g.edata
False False
**In-place operations on features of one graph will be reflected on features of **Heterogeneous Graphs**
the simple graph, which is dangerous. Out-place operations will not be reflected.**
>>> sg.ndata['h'] += 1
>>> g.ndata['h']
tensor([[1.],
[2.],
[3.]])
>>> g.ndata['h'] += 1
>>> sg.ndata['h']
tensor([[2.],
[3.],
[4.]])
>>> sg.ndata['h2'] = th.ones(3, 1)
>>> 'h2' in g.ndata
False
**Heterogeneous Graphs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])), ... ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
...@@ -1968,7 +1908,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True ...@@ -1968,7 +1908,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
return simple_graph return simple_graph
DGLHeteroGraph.to_simple = to_simple DGLHeteroGraph.to_simple = utils.alias_func(to_simple)
def as_heterograph(g, ntype='_U', etype='_E'): # pylint: disable=unused-argument def as_heterograph(g, ntype='_U', etype='_E'): # pylint: disable=unused-argument
"""Convert a DGLGraph to a DGLHeteroGraph with one node and edge type. """Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.
......
...@@ -891,4 +891,12 @@ def set_num_threads(num_threads): ...@@ -891,4 +891,12 @@ def set_num_threads(num_threads):
""" """
_CAPI_DGLSetOMPThreads(num_threads) _CAPI_DGLSetOMPThreads(num_threads)
def alias_func(func):
"""Return an alias function with proper docstring."""
@wraps(func)
def _fn(*args, **kwargs):
return func(*args, **kwargs)
_fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
return _fn
_init_api("dgl.utils.internal") _init_api("dgl.utils.internal")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment