Unverified Commit f13b9b62 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Doc] Scan the API docs and make many changes (#2080)



* WIP: api

* dgl.sampling, dgl.data

* dgl.sampling; dgl.dataloading

* sampling packages

* convert

* subgraph

* deprecate

* subgraph APIs

* All docstrings for convert/subgraph/transform

* almost all funcs under dgl namespace

* WIP: DGLGraph

* done graph query

* message passing functions

* lint

* fix merge error

* fix test

* lint

* fix
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 35e25914
"""For Graph Serialization"""
from __future__ import absolute_import
import os
from ..base import dgl_warning
from ..base import dgl_warning, DGLError
from ..heterograph import DGLHeteroGraph
from .._ffi.object import ObjectBase, register_object
from .._ffi.function import _init_api
......@@ -66,16 +66,23 @@ class GraphData(ObjectBase):
def save_graphs(filename, g_list, labels=None):
r"""
Save DGLGraphs and graph labels to file
r"""Save graphs and optionally their labels to file.
Besides saving to local files, DGL supports writing the graphs directly
to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing
``"hdfs://..."`` a path).
The function saves both the graph structure and node/edge features to file
in DGL's own binary format. For graph-level features, pass them via
the :attr:`labels` argument.
Parameters
----------
filename : str
File name to store graphs.
The file name to store the graphs and labels.
g_list: list
DGLGraph or list of DGLGraph/DGLHeteroGraph
labels: dict[str, tensor]
The graphs to be saved.
labels: dict[str, Tensor]
labels should be dict of tensors, with str as keys
Examples
......@@ -83,7 +90,7 @@ def save_graphs(filename, g_list, labels=None):
>>> import dgl
>>> import torch as th
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
Create :class:`DGLGraph` objects and initialize node
and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
......@@ -96,55 +103,66 @@ def save_graphs(filename, g_list, labels=None):
>>> graph_labels = {"glabel": th.tensor([0, 1])}
>>> save_graphs("./data.bin", [g1, g2], graph_labels)
See Also
--------
load_graphs
"""
# if it is local file, do some sanity check
if filename.startswith('s3://') is False:
assert not os.path.isdir(filename), "filename {} is an existing directory.".format(filename)
if os.path.isdir(filename):
raise DGLError("Filename {} is an existing directory.".format(filename))
f_path, _ = os.path.split(filename)
if not os.path.exists(f_path):
os.makedirs(f_path)
g_sample = g_list[0] if isinstance(g_list, list) else g_list
if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
if type(g_sample) == DGLHeteroGraph: # Doesn't support DGLHeteroGraph's derived class
save_heterographs(filename, g_list, labels)
else:
raise Exception(
"Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs/DGLHeteroGraphs")
raise DGLError(
"Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs.")
def load_graphs(filename, idx_list=None):
"""
Load DGLGraphs from file
"""Load graphs and optionally their labels from file saved by :func:`save_graphs`.
Besides loading from local files, DGL supports loading the graphs directly
from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing
``"hdfs://..."`` a path).
Parameters
----------
filename: str
filename to load graphs
idx_list: list of int
list of index of graph to be loaded. If not specified, will
load all graphs from file
The file name to load graphs from.
idx_list: list[int], optional
The indices of the graphs to be loaded if the file contains multiple graphs.
Default is loading all the graphs stored in the file.
Returns
--------
graph_list: list of DGLGraphs / DGLHeteroGraph
graph_list: list[DGLGraph]
The loaded graphs.
labels: dict[str, Tensor]
The graph labels stored in file. If no label is stored, the dictionary is empty.
Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary
always contains labels of all the graphs.
Regardless of whether the ``idx_list`` argument is given or not,
the returned dictionary always contains the labels of all the graphs.
Examples
----------
Following the example in save_graphs.
Following the example in :func:`save_graphs`.
>>> from dgl.data.utils import load_graphs
>>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
>>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]
See Also
--------
save_graphs
"""
# if it is local file, do some sanity check
assert filename.startswith('s3://') or os.path.exists(filename), "file {} does not exist.".format(filename)
if not (filename.startswith('s3://') or os.path.exists(filename)):
raise DGLError("File {} does not exist.".format(filename))
version = _CAPI_GetFileVersion(filename)
if version == 1:
......@@ -155,7 +173,7 @@ def load_graphs(filename, idx_list=None):
elif version == 2:
return load_graph_v2(filename, idx_list)
else:
raise Exception("Invalid DGL Version Number")
raise DGLError("Invalid DGL Version Number.")
def load_graph_v2(filename, idx_list=None):
......
"""Classes that involves iterating over nodes or edges in a graph and generates
computation dependency of necessary nodes with neighborhood sampling methods.
"""The ``dgl.dataloading`` package contains:
This includes
* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
a graph in minibatches.
* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
a graph in minibatches.
* Data loader classes for iterating over a set of nodes or edges in a graph and generates
computation dependency via neighborhood sampling methods.
* Various sampler classes that perform neighborhood sampling for multi-layer GNNs.
* Negative samplers for link prediction.
NOTE: this module is experimental and the interfaces may be subject to changes in
future releases.
For a holistic explanation on how different components work together.
Read the user guide :ref:`guide-minibatch`.
.. note::
This package is experimental and the interfaces may be subject
to changes in future releases. It currently only has implementations in PyTorch.
"""
from .neighbor import *
from .dataloader import *
......
"""Module for various graph generator functions."""
# pylint: disable= dangerous-default-value
from . import backend as F
from . import convert
......@@ -7,13 +6,14 @@ from . import random
__all__ = ['rand_graph', 'rand_bipartite']
def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
formats=['coo', 'csr', 'csc']):
"""Generate a random graph of the given number of nodes/edges.
def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()):
"""Generate a random graph of the given number of nodes/edges and return.
It uniformly chooses ``num_edges`` from all pairs and form a graph.
It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
TODO(minjie): support RNG as one of the arguments.
To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters
----------
......@@ -22,34 +22,51 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
num_edges : int
The number of edges
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional
Device on which the graph is created. Default: CPU.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
The device of the resulting graph. It should be a framework-specific device
object (e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLHeteroGraph
Generated random graph.
DGLGraph
The generated random graph.
See Also
--------
rand_bipartite
Examples
--------
>>> import dgl
>>> dgl.rand_graph(100, 10)
Graph(num_nodes=100, num_edges=10,
ndata_schemes={}
edata_schemes={})
"""
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_nodes * num_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols),
num_nodes=num_nodes,
idtype=idtype, device=device)
return g.formats(formats)
eids = F.zerocopy_to_numpy(eids)
rows = F.zerocopy_from_numpy(eids // num_nodes)
cols = F.zerocopy_from_numpy(eids % num_nodes)
rows = F.copy_to(F.astype(rows, idtype), device)
cols = F.copy_to(F.astype(cols, idtype), device)
return convert.graph((rows, cols),
num_nodes=num_nodes,
idtype=idtype, device=device)
def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and
number of edges.
idtype=F.int64, device=F.cpu()):
"""Generate a random uni-directional bipartite graph and return.
It uniformly chooses ``num_edges`` from all possible node pairs and form a graph.
The random choice is without replacement, which means there will be no multi-edge
in the resulting graph.
It uniformly chooses ``num_edges`` from all pairs and form a graph.
To control the randomness, set the random seed via :func:`dgl.seed`.
Parameters
----------
......@@ -60,28 +77,43 @@ def rand_bipartite(utype, etype, vtype,
vtype : str, optional
The name of the destination node type.
num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
The number of source nodes.
num_dst_nodes : int
The number of destination nodes, the :math:`|V|` in :math:`G=(U,V,E)`.
The number of destination nodes.
num_edges : int
The number of edges
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The data type for storing the structure-related graph information
such as node and edge IDs. It should be a framework-specific data type object
(e.g., torch.int32). By default, DGL uses int64.
device : Device context, optional
Device on which the graph is created. Default: CPU.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
The device of the resulting graph. It should be a framework-specific device
object (e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLHeteroGraph
Generated random bipartite graph.
DGLGraph
The generated random bipartite graph.
See Also
--------
rand_graph
Examples
--------
>>> import dgl
>>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10)
Graph(num_nodes={'game': 100, 'user': 50},
num_edges={('user', 'buys', 'game'): 10},
metagraph=[('user', 'game', 'buys')])
"""
#TODO(minjie): support RNG as one of the arguments.
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
return g.formats(formats)
eids = F.zerocopy_to_numpy(eids)
rows = F.zerocopy_from_numpy(eids // num_dst_nodes)
cols = F.zerocopy_from_numpy(eids % num_dst_nodes)
rows = F.copy_to(F.astype(rows, idtype), device)
cols = F.copy_to(F.astype(cols, idtype), device)
return convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
"""Classes for heterogeneous graphs."""
#pylint: disable= too-many-lines
from collections import defaultdict, Iterable
from collections.abc import Mapping
from collections import defaultdict
from collections.abc import Mapping, Iterable
from contextlib import contextmanager
import copy
import numbers
......@@ -21,158 +21,17 @@ from .view import HeteroNodeView, HeteroNodeDataView, HeteroEdgeView, HeteroEdge
__all__ = ['DGLHeteroGraph', 'combine_names']
class DGLHeteroGraph(object):
"""Base heterogeneous graph class.
**Do NOT instantiate from this class directly; use** :mod:`conversion methods
<dgl.convert>` **instead.**
A Heterogeneous graph is defined as a graph with node types and edge
types.
If two edges share the same edge type, then their source nodes, as well
as their destination nodes, also have the same type (the source node
types don't have to be the same as the destination node types).
Examples
--------
Suppose that we want to construct the following heterogeneous graph:
.. graphviz::
digraph G {
Alice -> Bob [label=follows]
Bob -> Carol [label=follows]
Alice -> Tetris [label=plays]
Bob -> Tetris [label=plays]
Bob -> Minecraft [label=plays]
Carol -> Minecraft [label=plays]
Nintendo -> Tetris [label=develops]
Mojang -> Minecraft [label=develops]
{rank=source; Alice; Bob; Carol}
{rank=sink; Nintendo; Mojang}
}
And suppose that one maps the users, games and developers to the following
IDs:
========= ===== === =====
User name Alice Bob Carol
========= ===== === =====
User ID 0 1 2
========= ===== === =====
========= ====== =========
Game name Tetris Minecraft
========= ====== =========
Game ID 0 1
========= ====== =========
============== ======== ======
Developer name Nintendo Mojang
============== ======== ======
Developer ID 0 1
============== ======== ======
One can construct the graph as follows:
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
... })
Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
>>> g.number_of_nodes('user')
3
>>> g.number_of_edges('plays')
4
>>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' edge type
tensor([1, 1])
>>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' edge type
(tensor([0]), tensor([0]))
Or on the sliced graph for an edge type:
>>> g['plays'].number_of_edges()
4
>>> g['develops'].out_degrees()
tensor([1, 1])
>>> g['develops'].in_edges(0)
(tensor([0]), tensor([0]))
Node type names must be distinct (no two types have the same name). Edge types could
have the same name but they must be distinguishable by the ``(src_type, edge_type, dst_type)``
triplet (called *canonical edge type*).
For example, suppose a graph that has two types of relation "user-watches-movie"
and "user-watches-TV" as follows:
>>> GG = dgl.heterograph({
... ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
... ('user', 'watches', 'TV'): ([0, 1], [0, 1])
... })
To distinguish between the two "watches" edge type, one must specify a full triplet:
>>> GG.number_of_edges(('user', 'watches', 'movie'))
3
>>> GG.number_of_edges(('user', 'watches', 'TV'))
2
>>> GG['user', 'watches', 'movie'].out_degrees()
tensor([1, 2])
Using only one single edge type string "watches" is ambiguous and will cause error:
>>> GG.number_of_edges('watches') # AMBIGUOUS!!
In many cases, there is only one type of nodes or one type of edges, and the ``ntype``
and ``etype`` argument could be omitted. This is very common when using the sliced
graph, which usually contains only one edge type, and sometimes only one node type:
>>> g['follows'].number_of_nodes() # OK!! because g['follows'] only has one node type 'user'
3
>>> g['plays'].number_of_nodes() # ERROR!! There are two types 'user' and 'game'.
>>> g['plays'].number_of_edges() # OK!! because there is only one edge type 'plays'
TODO(minjie): docstring about uni-directional bipartite graph
Metagraph
---------
For each heterogeneous graph, one can often infer the *metagraph*, the template of
edge connections showing how many types of nodes and edges exist in the graph, and
how each edge type could connect between node types.
One can analyze the example gameplay graph above and figure out the metagraph as
follows:
.. graphviz::
digraph G {
User -> User [label=follows]
User -> Game [label=plays]
Developer -> Game [label=develops]
}
"""Class for storing graph structure and node/edge feature data.
There are a few ways to create create a DGLGraph:
Parameters
----------
gidx : HeteroGraphIndex
Graph index object.
ntypes : list of str, pair of list of str
Node type list. ``ntypes[i]`` stores the name of node type i.
If a pair is given, the graph created is a uni-directional bipartite graph,
and its SRC node types and DST node types are given as in the pair.
etypes : list of str
Edge type list. ``etypes[i]`` stores the name of edge type i.
node_frames : list[Frame], optional
Node feature storage. If None, empty frame is created.
Otherwise, ``node_frames[i]`` stores the node features
of node type i. (default: None)
edge_frames : list[Frame], optional
Edge feature storage. If None, empty frame is created.
Otherwise, ``edge_frames[i]`` stores the edge features
of edge type i. (default: None)
* To create a homogeneous graph from Tensor data, use :func:`dgl.graph`.
* To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`.
* To create a graph from other data sources, use ``dgl.*`` create ops. See
:ref:`api-graph-create-ops`.
Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its
usage.
"""
is_block = False
......@@ -184,6 +43,27 @@ class DGLHeteroGraph(object):
node_frames=None,
edge_frames=None,
**deprecate_kwargs):
"""Internal constructor for creating a DGLGraph.
Parameters
----------
gidx : HeteroGraphIndex
Graph index object.
ntypes : list of str, pair of list of str
Node type list. ``ntypes[i]`` stores the name of node type i.
If a pair is given, the graph created is a uni-directional bipartite graph,
and its SRC node types and DST node types are given as in the pair.
etypes : list of str
Edge type list. ``etypes[i]`` stores the name of edge type i.
node_frames : list[Frame], optional
Node feature storage. If None, empty frame is created.
Otherwise, ``node_frames[i]`` stores the node features
of node type i. (default: None)
edge_frames : list[Frame], optional
Edge feature storage. If None, empty frame is created.
Otherwise, ``edge_frames[i]`` stores the edge features
of edge type i. (default: None)
"""
if isinstance(gidx, DGLHeteroGraph):
raise DGLError('The input is already a DGLGraph. No need to create it again.')
if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
......@@ -851,12 +731,17 @@ class DGLHeteroGraph(object):
@property
def ntypes(self):
"""Return the node types of the graph.
"""Return all the node type names in the graph.
Returns
-------
list of str
Each ``str`` is a node type.
list[str]
All the node type names in a list.
Notes
-----
DGL internally assigns an integer ID for each node type. The returned
node type names are sorted according to their IDs.
Examples
--------
......@@ -877,19 +762,27 @@ class DGLHeteroGraph(object):
@property
def etypes(self):
"""Return the edge types of the graph.
"""Return all the edge type names in the graph.
Returns
-------
list of str
Each ``str`` is an edge type.
list[str]
All the edge type names in a list.
Notes
-----
An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
``('protein', 'interacts', 'protein')``. It is recommended to use
:func:`~dgl.DGLGraph.canonical_etypes` in this case.
DGL internally assigns an integer ID for each edge type. The returned
edge type names are sorted according to their IDs.
The complete format to specify an relation is a string triplet ``(str, str, str)``
for source node type, edge type and destination node type. DGL calls this
format *canonical edge type*. An edge type can appear in multiple canonical edge types.
For example, ``'interacts'`` can appear in two canonical edge types
``('drug', 'interacts', 'drug')`` and ``('protein', 'interacts', 'protein')``.
See Also
--------
canonical_etypes
Examples
--------
......@@ -910,16 +803,24 @@ class DGLHeteroGraph(object):
@property
def canonical_etypes(self):
"""Return the canonical edge types of the graph.
"""Return all the canonical edge types in the graph.
A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
and destination nodes respectively.
A canonical edge type is a string triplet ``(str, str, str)``
for source node type, edge type and destination node type.
Returns
-------
list of 3-tuple of str
Each 3-tuple of str is a canonical edge type.
list[(str, str, str)]
All the canonical edge type triplets in a list.
Notes
-----
DGL internally assigns an integer ID for each edge type. The returned
edge type names are sorted according to their IDs.
See Also
--------
etypes
Examples
--------
......@@ -942,15 +843,24 @@ class DGLHeteroGraph(object):
@property
def srctypes(self):
"""Return the source node types.
"""Return all the source node type names in this graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Returns
-------
list of str
list[str]
All the source node type names in a list.
* If the graph is a uni-bipartite graph, it returns the source node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
See Also
--------
dsttypes
is_unibipartite
Examples
--------
......@@ -984,16 +894,24 @@ class DGLHeteroGraph(object):
@property
def dsttypes(self):
"""Return the destination node types.
"""Return all the destination node type names in this graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Returns
-------
list of str
Each str is a node type.
list[str]
All the destination node type names in a list.
* If the graph is a uni-bipartite graph, it returns the destination node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
See Also
--------
srctypes
is_unibipartite
Examples
--------
......@@ -1065,29 +983,24 @@ class DGLHeteroGraph(object):
def to_canonical_etype(self, etype):
"""Convert an edge type to the corresponding canonical edge type in the graph.
A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
nodes, edges and destination nodes.
A canonical edge type is a string triplet ``(str, str, str)``
for source node type, edge type and destination node type.
The function expects the given edge type name can uniquely identify a canonical edge
type. DGL will raise error if this is not the case.
Parameters
----------
etype : str or 3-tuple of str
etype : str or (str, str, str)
If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
type in the graph. If :attr:`etype` is already a canonical edge type
(3-tuple of str), it simply returns :attr:`etype`.
type in the graph. If :attr:`etype` is already a canonical edge type,
it directly returns the input unchanged.
Returns
-------
3-tuple of str
(str, str, str)
The canonical edge type corresponding to the edge type.
Notes
-----
If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
for :attr:`etype` while ``'watches'`` is a valid one.
Examples
--------
The following example uses PyTorch backend.
......@@ -1131,7 +1044,7 @@ class DGLHeteroGraph(object):
return ret
def get_ntype_id(self, ntype):
"""Return the id of the given node type.
"""Return the ID of the given node type.
ntype can also be None. If so, there should be only one node type in the
graph.
......@@ -1165,7 +1078,7 @@ class DGLHeteroGraph(object):
return ntid
def get_ntype_id_from_src(self, ntype):
"""Return the id of the given SRC node type.
"""Internal function to return the ID of the given SRC node type.
ntype can also be None. If so, there should be only one node type in the
SRC category. Callable even when the self graph is not uni-bipartite.
......@@ -1190,7 +1103,7 @@ class DGLHeteroGraph(object):
return ntid
def get_ntype_id_from_dst(self, ntype):
"""Return the id of the given DST node type.
"""Internal function to return the ID of the given DST node type.
ntype can also be None. If so, there should be only one node type in the
DST category. Callable even when the self graph is not uni-bipartite.
......@@ -2057,16 +1970,16 @@ class DGLHeteroGraph(object):
#################################################################
def number_of_nodes(self, ntype=None):
"""Alias of :func:`num_nodes`"""
"""Alias of :meth:`num_nodes`"""
return self.num_nodes(ntype)
def num_nodes(self, ntype=None):
"""Return the number of nodes.
"""Return the number of nodes of in the graph.
Parameters
----------
ntype : str, optional
The node type for query. If given, it returns the number of nodes for a particular
The node type name. If given, it returns the number of nodes of the
type. If not given (default), it returns the total number of nodes of all types.
Returns
......@@ -2104,17 +2017,24 @@ class DGLHeteroGraph(object):
return self._graph.number_of_nodes(self.get_ntype_id(ntype))
def number_of_src_nodes(self, ntype=None):
"""Alias of :func:`num_src_nodes`"""
"""Alias of :meth:`num_src_nodes`"""
return self.num_src_nodes(ntype)
def num_src_nodes(self, ntype=None):
"""Return the number of nodes of the given source node type.
"""Return the number of source nodes in the graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Parameters
----------
ntype : str, optional
The source node type for query. If given, it returns the number of nodes for a
particular source node type. If not given (default), it returns the number of
The source node type name. If given, it returns the number of nodes for
the source node type. If not given (default), it returns the number of
nodes summed over all source node types.
Returns
......@@ -2122,6 +2042,11 @@ class DGLHeteroGraph(object):
int
The number of nodes
See Also
--------
num_dst_nodes
is_unibipartite
Examples
--------
The following example uses PyTorch backend.
......@@ -2162,20 +2087,32 @@ class DGLHeteroGraph(object):
return self.num_dst_nodes(ntype)
def num_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given destination node type.
"""Return the number of destination nodes in the graph.
If the graph can further divide its node types into two subsets A and B where
all the edeges are from nodes of types in A to nodes of types in B, we call
this graph a *uni-bipartite* graph and the nodes in A being the *source*
nodes and the ones in B being the *destination* nodes. If the graph is not
uni-bipartite, the source and destination nodes are just the entire set of
nodes in the graph.
Parameters
----------
ntype : str, optional
The destination node type for query. If given, it returns the number of nodes for a
particular destination node type. If not given (default), it returns the number of
nodes summed over all destination node types.
The destination node type name. If given, it returns the number of nodes of
the destination node type. If not given (default), it returns the number of
nodes summed over all the destination node types.
Returns
-------
int
The number of nodes
See Also
--------
num_src_nodes
is_unibipartite
Examples
--------
The following example uses PyTorch backend.
......@@ -2216,16 +2153,19 @@ class DGLHeteroGraph(object):
return self.num_edges(etype)
def num_edges(self, etype=None):
"""Return the number of edges.
"""Return the number of edges in the graph.
Parameters
----------
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
If not provided, return the total number of edges regardless of the types
in the graph.
Returns
-------
......@@ -2277,22 +2217,21 @@ class DGLHeteroGraph(object):
@property
def is_multigraph(self):
"""Whether the graph is a multigraph
In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
"""Return whether the graph is a multigraph with parallel edges.
For a heterogeneous graph of multiple canonical edge types, we consider it as a
multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
canonical edge type.
A multigraph has more than one edges between the same pair of nodes, called
*parallel edges*. For heterogeneous graphs, parallel edge further requires
the canonical edge type to be the same (see :meth:`canonical_etypes` for the
definition).
Returns
-------
bool
Whether the graph is a multigraph.
True if the graph is a multigraph.
Notes
-----
Checking whether the graph is a multigraph can be expensive for a large one.
Checking whether the graph is a multigraph could be expensive for a large one.
Examples
--------
......@@ -2330,14 +2269,14 @@ class DGLHeteroGraph(object):
@property
def is_homogeneous(self):
"""Whether the graph is a homogeneous graph.
"""Return whether the graph is a homogeneous graph.
A homogeneous graph only has one node type and one edge type.
Returns
-------
bool
Whether the graph is a homogeneous graph.
True if the graph is a homogeneous graph.
Examples
--------
......@@ -2366,7 +2305,7 @@ class DGLHeteroGraph(object):
@property
def is_readonly(self):
"""Deprecated: DGLGraph will always be mutable.
"""**DEPRECATED**: DGLGraph will always be mutable.
Returns
-------
......@@ -2424,38 +2363,33 @@ class DGLHeteroGraph(object):
return self._graph.dtype
def __contains__(self, vid):
"""Deprecated: please directly call :func:`has_nodes`.
"""
"""**DEPRECATED**: please directly call :func:`has_nodes`."""
dgl_warning('DGLGraph.__contains__ is deprecated.'
' Please directly call has_nodes.')
return self.has_nodes(vid)
def has_nodes(self, vid, ntype=None):
"""Whether the graph has some particular node(s) of a given type.
"""Return whether the graph contains the given nodes.
Parameters
----------
vid : node ID(s)
The node ID(s) for query. The allowed formats are:
The nodes IDs. The allowed nodes ID formats are:
* ``int``: The ID of a single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
- ``int``: The ID of a single node.
- ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
of integers that contains the IDs of multiple nodes.
ntype : str, optional
The node type for query. It is required if the graph has
multiple node types.
The node type name. Can be omitted if there is
only one type of nodes in the graph.
Returns
-------
bool or bool Tensor
- If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
whether the graph has the particular node.
- If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
the result will be a bool Tensor whose i-th element indicates whether
the graph has node :attr:`vid[i]` of the given type.
A tensor of bool flags where each element is True if the node is in the graph.
If the input is a single node, return one bool value.
Examples
--------
......@@ -2494,50 +2428,47 @@ class DGLHeteroGraph(object):
def has_node(self, vid, ntype=None):
"""Whether the graph has a particular node of a given type.
DEPRECATED: see :func:`~DGLGraph.has_nodes`
**DEPRECATED**: see :func:`~DGLGraph.has_nodes`
"""
dgl_warning("DGLGraph.has_node is deprecated. Please use DGLGraph.has_nodes")
return self.has_nodes(vid, ntype)
def has_edges_between(self, u, v, etype=None):
"""Whether the graph has some particular edge(s) of a given type.
"""Return whether the graph contains the given edges.
Parameters
----------
u : source node ID(s)
The source node(s) of the edges for query. The allowed formats are:
- ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
The data type and device of the tensor must be the same as the :py:attr:`idtype` and
device of the graph. Its i-th element represents the source node ID of the
i-th edge for query.
- ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
u : node IDs
The source node IDs of the edges. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
v : node IDs
The destination node IDs of the edges. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
bool or bool Tensor
- If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
- If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
result will be a bool Tensor whose i-th element indicates whether there is an
edge from ``u[i]`` to ``v[i]`` of the given edge type.
Notes
-----
The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
number of nodes of the source and destination type.
A tensor of bool flags where each element is True if the node is in the graph.
If the input is a single node, return one bool value.
Examples
--------
......@@ -2595,7 +2526,7 @@ class DGLHeteroGraph(object):
def has_edge_between(self, u, v, etype=None):
"""Whether the graph has edges of type ``etype``.
DEPRECATED: please use :func:`~DGLGraph.has_edge_between`.
**DEPRECATED**: please use :func:`~DGLGraph.has_edge_between`.
"""
dgl_warning("DGLGraph.has_edge_between is deprecated. "
"Please use DGLGraph.has_edges_between")
......@@ -2610,12 +2541,16 @@ class DGLHeteroGraph(object):
Parameters
----------
v : int
The destination node for query.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
The node ID.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -2664,12 +2599,15 @@ class DGLHeteroGraph(object):
Parameters
----------
v : int
The source node for query.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
The node ID.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -2720,48 +2658,51 @@ class DGLHeteroGraph(object):
return_uv=return_uv, etype=etype)
def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
"""Return the ID(s) of edge(s) from the given source node(s) to the given destination
node(s) with the specified edge type.
"""Return the edge ID(s) given the two endpoints of the edge(s).
Parameters
----------
u : source node ID(s)
The source node(s) of the edges for query. The allowed formats are:
- ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
data type an device should be the same as the :py:attr:`idtype` and device of
the graph. Its i-th element is the source node of the i-th edge for query.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
u : node IDs
The source node IDs of the edges. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
v : node IDs
The destination node IDs of the edges. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
force_multi : bool, optional
Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
**DEPRECATED**, use :attr:`return_uv` instead. Whether to allow the graph to be a
multigraph, i.e. there can be multiple edges from one node to another.
return_uv : bool, optional
Whether to return the source and destination node IDs along with the edges. If
False (default), it assumes that the graph is a simple graph and there is only
one edge from one node to another. If True, there can be multiple edges found
from one node to another.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor, or (tensor, tensor, tensor)
Tensor, or (Tensor, Tensor, Tensor)
* If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
element of the tensor is the ID of the edge ``(u[i], v[i])``.
* If ``return_uv=False``, it returns the edge IDs in a tensor, where the i-th
element is the ID of the edge ``(u[i], v[i])``.
* If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
from ``eu[i]`` to ``ev[i]`` in this case.
(including parallel edges) from ``eu[i]`` to ``ev[i]`` in this case.
Notes
-----
......@@ -2840,34 +2781,35 @@ class DGLHeteroGraph(object):
return F.as_scalar(eid) if is_int else eid
def find_edges(self, eid, etype=None):
"""Return the source and destination node(s) of some particular edge(s)
with the specified edge type.
"""Return the source and destination node ID(s) given the edge ID(s).
Parameters
----------
eid : edge ID(s)
The IDs of the edges for query. The function expects that :attr:`eid` contains
valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
:math:`E` is the number of edges with the specified edge type.
- ``int``: An edge ID for query.
- ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type.
The edge IDs. The allowed formats are:
* ``int``: A single ID.
* Int Tensor: Each element is an ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an ID.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
Tensor
The source node IDs of the edges, whose i-th element is the source node of the edge
with ID ``eid[i]``.
The source node IDs of the edges. The i-th element is the source node ID of
the i-th edge.
Tensor
The destination node IDs of the edges, whose i-th element is the destination node of
the edge with ID ``eid[i]``.
The destination node IDs of the edges. The i-th element is the destination node
ID of the i-th edge.
Examples
--------
......@@ -2910,20 +2852,19 @@ class DGLHeteroGraph(object):
return src, dst
def in_edges(self, v, form='uv', etype=None):
"""Return the incoming edges of some particular node(s) with the specified edge type.
"""Return the incoming edges of the given nodes.
Parameters
----------
v : destination node ID(s)
The destination node(s) for query. The allowed formats are:
v : node ID(s)
The node IDs. The allowed formats are:
- ``int``: The destination node for query.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
form : str, optional
The return form, which can be one of the following:
The result format, which can be one of the following:
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
......@@ -2933,11 +2874,14 @@ class DGLHeteroGraph(object):
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -2992,18 +2936,17 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def out_edges(self, u, form='uv', etype=None):
"""Return the outgoing edges of some particular node(s) with the specified edge type.
"""Return the outgoing edges of the given nodes.
Parameters
----------
u : source node ID(s)
The source node(s) for query. The allowed formats are:
u : node ID(s)
The node IDs. The allowed formats are:
- ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
form : str, optional
The return form, which can be one of the following:
......@@ -3015,11 +2958,14 @@ class DGLHeteroGraph(object):
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -3159,43 +3105,41 @@ class DGLHeteroGraph(object):
def in_degree(self, v, etype=None):
"""Return the in-degree of node ``v`` with edges of type ``etype``.
DEPRECATED: Please use in_degrees
**DEPRECATED**: Please use in_degrees
"""
dgl_warning("DGLGraph.in_degree is deprecated. Please use DGLGraph.in_degrees")
return self.in_degrees(v, etype)
def in_degrees(self, v=ALL, etype=None):
"""Return the in-degree(s) of some particular node(s) with the specified edge type.
"""Return the in-degree(s) of the given nodes.
It computes the in-degree(s) w.r.t. to the edges of the given edge type.
Parameters
----------
v : destination node ID(s), optional
The destination node(s) for query. The allowed formats are:
v : node IDs
The node IDs. The allowed formats are:
- ``int``: The destination node for query.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
By default, it considers all nodes.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
If not given, return the in-degrees of all the nodes.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor or int
The in-degree(s) of the node(s).
- If :attr:`v` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the in-degree of the
node ``v[i]``.
int or Tensor
The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree
of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
Examples
--------
......@@ -3251,36 +3195,35 @@ class DGLHeteroGraph(object):
return self.out_degrees(u, etype)
def out_degrees(self, u=ALL, etype=None):
"""Return the out-degree(s) of some particular node(s) with the specified edge type.
"""Return the out-degree(s) of the given nodes.
It computes the out-degree(s) w.r.t. to the edges of the given edge type.
Parameters
----------
u : source node ID(s), optional
u : node IDs
The node IDs. The allowed formats are:
- ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
By default, it considers all nodes.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
If not given, return the in-degrees of all the nodes.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor or int
The out-degree(s) of the node(s).
- If :attr:`u` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the out-degree of the
node ``v[i]``.
int or Tensor
The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree
of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too.
Examples
--------
......@@ -3330,7 +3273,7 @@ class DGLHeteroGraph(object):
return deg
def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :func:`adj`"""
"""Alias of :meth:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
......@@ -3351,12 +3294,15 @@ class DGLHeteroGraph(object):
scipy_fmt : str, optional
If specified, return a scipy sparse matrix in the given format.
Otherwise, return a backend dependent sparse tensor. (Default: None)
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -3398,6 +3344,7 @@ class DGLHeteroGraph(object):
else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
"""
......@@ -3407,10 +3354,6 @@ class DGLHeteroGraph(object):
return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)
def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
"""Alias of :func:`inc`"""
return self.inc(typestr, ctx, etype)
def inc(self, typestr, ctx=F.cpu(), etype=None):
"""Return the incidence matrix representation of edges with the given
edge type.
......@@ -3446,12 +3389,14 @@ class DGLHeteroGraph(object):
Can be either ``in``, ``out`` or ``both``
ctx : context, optional
The context of returned incidence matrix. (Default: cpu)
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -3485,6 +3430,8 @@ class DGLHeteroGraph(object):
etid = self.get_etype_id(etype)
return self._graph.incidence_matrix(etid, typestr, ctx)[0]
incidence_matrix = inc
#################################################################
# Features
#################################################################
......@@ -3497,8 +3444,8 @@ class DGLHeteroGraph(object):
Parameters
----------
ntype : str, optional
The node type for query. If the graph has multiple node types, one must
specify the argument. Otherwise, it can be omitted.
The node type name. Can be omitted if there is only one type of nodes
in the graph.
Returns
-------
......@@ -3544,11 +3491,15 @@ class DGLHeteroGraph(object):
Parameters
----------
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
......@@ -3614,9 +3565,7 @@ class DGLHeteroGraph(object):
The name of the feature that the initializer applies. If not given, the
initializer applies to all features.
ntype : str, optional
The type of the nodes that the initializer applies. If the graph has
multiple node types, one must specify the argument. Otherwise, it can
be omitted.
The type name of the nodes. Can be omitted if the graph has only one type of nodes.
Notes
-----
......@@ -3703,11 +3652,15 @@ class DGLHeteroGraph(object):
field : str, optional
The name of the feature that the initializer applies. If not given, the
initializer applies to all features.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes
-----
......@@ -3945,27 +3898,50 @@ class DGLHeteroGraph(object):
#################################################################
def apply_nodes(self, func, v=ALL, ntype=None, inplace=False):
"""Apply the function on the nodes with the same type to update their
features.
If None is provided for ``func``, nothing will happen.
"""Update the features of the specified nodes by the provided function.
Parameters
----------
func : callable or None
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`.
v : int or iterable of int or tensor, optional
The (type-specific) node (ids) on which to apply ``func``. (Default: ALL)
func : callable
The function to update node features. It must be
a :ref:`apiudf`.
v : node IDs
The node IDs. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If not given (default), use all the nodes in the graph.
ntype : str, optional
The node type. Can be omitted if there is only one node type
in the graph. (Default: None)
The node type name. Can be omitted if there is
only one type of nodes in the graph.
inplace : bool, optional
**DEPRECATED**. If True, update will be done in place, but autograd will break.
(Default: False)
**DEPRECATED**.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['h'] = torch.ones(5, 2)
>>> g.apply_nodes(lambda nodes: {'x' : nodes.data['h'] * 2})
>>> g.ndata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.ones(3, 5)
>>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
......@@ -3990,27 +3966,76 @@ class DGLHeteroGraph(object):
self._set_n_repr(ntid, v, ndata)
def apply_edges(self, func, edges=ALL, etype=None, inplace=False):
"""Apply the function on the edges with the same type to update their
features.
If None is provided for ``func``, nothing will happen.
"""Update the features of the specified edges by the provided function.
Parameters
----------
func : callable
Apply function on the edge. The function should be
an :mod:`Edge UDF <dgl.udf>`.
edges : optional
Edges on which to apply ``func``. See :func:`send` for valid
edge specification. (Default: ALL)
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
func : dgl.function.BuiltinFunction or callable
The function to generate new edge features. It must be either
a :ref:`api-built-in` or a :ref:`apiudf`.
edges : edges
The edges to update features on. The allowed input formats are:
* ``int``: A single edge ID.
* Int Tensor: Each element is an edge ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* (Tensor, Tensor): The node-tensors format where the i-th elements
of the two tensors specify an edge.
* (iterable[int], iterable[int]): Similar to the node-tensors format but
stores edge endpoints in python iterables.
Default value specifies all the edges in the graph.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional
**DEPRECATED**. Must be False.
**DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`func` argument,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['h'] = torch.ones(5, 2)
>>> g.apply_edges(lambda edges: {'x' : edges.src['h'] + edges.dst['h']})
>>> g.edata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
Use built-in function
>>> import dgl.function as fn
>>> g.apply_edges(fn.u_add_v('h', 'h', 'x'))
>>> g.edata['x']
tensor([[2., 2.],
[2., 2.],
[2., 2.],
[2., 2.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
>>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
>>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
......@@ -4048,40 +4073,50 @@ class DGLHeteroGraph(object):
apply_node_func=None,
etype=None,
inplace=False):
"""Send messages along edges of the specified type, and let destinations
receive them.
Optionally, apply a function to update the node features after "receive".
This is a convenient combination for performing
:mod:`send <dgl.DGLHeteroGraph.send>` along the ``edges`` and
:mod:`recv <dgl.DGLHeteroGraph.recv>` for the destinations of the ``edges``.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].send_and_recv(edges, message_func, reduce_func,
apply_node_func, inplace=inplace)
"""Send messages along the specified edges and reduce them on
the destination nodes to update their features.
Parameters
----------
edges : See :func:`send` for valid edge specification.
Edges on which to apply ``func``.
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
edges : edges
The edges to send and receive messages on. The allowed input formats are:
* ``int``: A single edge ID.
* Int Tensor: Each element is an edge ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* (Tensor, Tensor): The node-tensors format where the i-th elements
of the two tensors specify an edge.
* (iterable[int], iterable[int]): Similar to the node-tensors format but
stores edge endpoints in python iterables.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional
**DEPRECATED**. Must be False.
**DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
......@@ -4090,6 +4125,29 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn
>>> import torch
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> # Specify edges using (Tensor, Tensor).
>>> g.send_and_recv(([1, 2], [2, 3]), fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[0., 0.],
[1., 1.],
[1., 1.],
[0., 0.]])
>>> # Specify edges using IDs.
>>> g.send_and_recv([0, 2, 3], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[0., 0.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
......@@ -4128,45 +4186,50 @@ class DGLHeteroGraph(object):
apply_node_func=None,
etype=None,
inplace=False):
"""Pull messages from the node(s)' predecessors and then update their features.
Optionally, apply a function to update the node features after receive.
This is equivalent to :mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>`
on the incoming edges of ``v`` with the specified type.
Other notes:
* `reduce_func` will be skipped for nodes with no incoming messages.
* If all ``v`` have no incoming message, this will downgrade to an :func:`apply_nodes`.
* If some ``v`` have no incoming message, their new feature value will be calculated
by the column initializer (see :func:`set_n_initializer`). The feature shapes and
dtypes will be inferred.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].pull(v, message_func, reduce_func, apply_node_func, inplace=inplace)
"""Pull messages from the specified node(s)' predecessors along the
specified edge type, aggregate them to update the node features.
Parameters
----------
v : int, container or tensor, optional
The node(s) to be updated.
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
v : node IDs
The node IDs. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional
**DEPRECATED**. Must be False.
**DEPRECATED**.
Notes
-----
* If some of the given nodes :attr:`v` has no in-edges, DGL does not invoke
message and reduce functions for these nodes and fill their aggregated messages
with zero. Users can control the filled values via :meth:`set_n_initializer`.
DGL still invokes :attr:`apply_node_func` if provided.
* DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
......@@ -4175,7 +4238,19 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn
>>> import torch
Instantiate a heterograph.
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.pull([0, 3, 4], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
......@@ -4214,36 +4289,46 @@ class DGLHeteroGraph(object):
apply_node_func=None,
etype=None,
inplace=False):
"""Send message from the node(s) to their successors and update them.
This is equivalent to performing
:mod:`send_and_recv <DGLHeteroGraph.send_and_recv>` along the outbound
edges from ``u``.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].push(u, message_func, reduce_func, apply_node_func, inplace=inplace)
"""Send message from the specified node(s) to their successors
along the specified edge type and update their node features.
Parameters
----------
u : int, container or tensor
The node(s) to push out messages.
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
v : node IDs
The node IDs. The allowed formats are:
* ``int``: A single node.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
inplace: bool, optional
**DEPRECATED**. Must be False.
**DEPRECATED**.
Notes
-----
DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
......@@ -4252,7 +4337,19 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn
>>> import torch
Instantiate a heterograph.
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.push([0, 1], fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[1., 1.],
[0., 0.],
[0., 0.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
......@@ -4275,42 +4372,59 @@ class DGLHeteroGraph(object):
reduce_func,
apply_node_func=None,
etype=None):
"""Send messages through all edges and update all nodes.
Optionally, apply a function to update the node features after receive.
This is equivalent to
:mod:`send_and_recv <dgl.DGLHeteroGraph.send_and_recv>` over all edges
of the specified type.
**Only works if the graph has one edge type.** For multiple types, use
.. code::
g['edgetype'].update_all(message_func, reduce_func, apply_node_func)
"""Send messages along all the edges of the specified type
and update all the nodes of the corresponding destination type.
Parameters
----------
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes
-----
* If some of the nodes in the graph has no in-edges, DGL does not invoke
message and reduce functions for these nodes and fill their aggregated messages
with zero. Users can control the filled values via :meth:`set_n_initializer`.
DGL still invokes :attr:`apply_node_func` if provided.
* DGL recommends using DGL's bulit-in function for the :attr:`message_func`
and the :attr:`reduce_func` arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
>>> import torch
>>> import dgl
>>> import dgl.function as fn
>>> import torch
Instantiate a heterograph.
**Homogeneous graph**
>>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]))
>>> g.ndata['x'] = torch.ones(5, 2)
>>> g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'h'))
>>> g.ndata['h']
tensor([[0., 0.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.]])
**Heterogeneous graph**
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})
......@@ -4335,35 +4449,48 @@ class DGLHeteroGraph(object):
#################################################################
def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None):
r"""Send and receive messages along all edges.
This is equivalent to
:mod:`multi_send_and_recv <dgl.DGLHeteroGraph.multi_send_and_recv>`
over all edges.
r"""Send messages along all the edges, reduce them by first type-wisely
then across different types, and then update the node features of all
the nodes.
Parameters
----------
etype_dict : dict
Mapping an edge type (str or tuple of str) to the type specific
configuration (3-tuples). Each 3-tuple represents
(msg_func, reduce_func, apply_node_func):
* msg_func: callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
* reduce_func: callable
Reduce function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`.
Arguments for edge-type-wise message passing. The keys are edge types
while the values are message passing arguments.
The allowed key formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
The value must be a tuple ``(message_func, reduce_func, [apply_node_func])``, where
* message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
* reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
* apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
cross_reducer : str
Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"``.
apply_node_func : callable
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
inplace: bool, optional
**DEPRECATED**. Must be False.
apply_node_func : callable, optional
An optional apply function after the messages are reduced both
type-wisely and across different types.
It must be a :ref:`apiudf`.
Notes
-----
DGL recommends using DGL's bulit-in function for the message_func
and the reduce_func in the type-wise message passing arguments,
because DGL will invoke efficient kernels that avoids copying node features to
edge features in this case.
Examples
--------
......@@ -4431,21 +4558,27 @@ class DGLHeteroGraph(object):
Parameters
----------
nodes_generator : iterable, each element is a list or a tensor of node ids
The generator of node frontiers. It specifies which nodes perform
:func:`pull` at each timestep.
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
nodes_generator : iterable[node IDs]
The generator of node frontiers. Each frontier is a set of node IDs
stored in Tensor or python iterables.
It specifies which nodes perform :func:`pull` at each step.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Examples
--------
......@@ -4492,18 +4625,23 @@ class DGLHeteroGraph(object):
----------
edges_generator : generator
The generator of edge frontiers.
message_func : callable
Message function on the edges. The function should be
an :mod:`Edge UDF <dgl.udf>`.
reduce_func : callable
Reduce function on the node. The function should be
a :mod:`Node UDF <dgl.udf>`.
message_func : dgl.function.BuiltinFunction or callable
The message function to generate messages along the edges.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
reduce_func : dgl.function.BuiltinFunction or callable
The reduce function to aggregate the messages.
It must be either a :ref:`api-built-in` or a :ref:`apiudf`.
apply_node_func : callable, optional
Apply function on the nodes. The function should be
a :mod:`Node UDF <dgl.udf>`. (Default: None)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
An optional apply function to further update the node features
after the message reduction. It must be a :ref:`apiudf`.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Examples
--------
......@@ -4563,7 +4701,7 @@ class DGLHeteroGraph(object):
Returns
-------
tensor
Tensor
A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.
Examples
......@@ -4630,30 +4768,31 @@ class DGLHeteroGraph(object):
Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding edge in
the batch satisfies the predicate.
edges : edge ID(s) or edge end nodes, optional
The edge(s) for query. The allowed formats are:
edges : edges
The edges to send and receive messages on. The allowed input formats are:
- Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
- (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
format "tuple of node-tensors". The data type and device of the tensors should
be the same as the :py:attr:`idtype` and device of the graph.
- (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
By default, it considers all edges.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
* ``int``: A single edge ID.
* Int Tensor: Each element is an edge ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* (Tensor, Tensor): The node-tensors format where the i-th elements
of the two tensors specify an edge.
* (iterable[int], iterable[int]): Similar to the node-tensors format but
stores edge endpoints in python iterables.
By default, it considers all the edges.
etype : str or (str, str, str), optional
The type name of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor
Tensor
A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.
Examples
......
"""Package for neural network common components."""
"""The ``dgl.nn`` package contains framework-specific implementations for
common Graph Neural Network layers (or module in PyTorch, Block in MXNet).
Users can directly import ``dgl.nn.<layer_name>`` (e.g., ``dgl.nn.GraphConv``),
and the package will dispatch the layer name to the actual implementation
according to the backend framework currently in use.
Note that there are coverage differences among frameworks. If you encounter
an ``ImportError: cannot import name 'XXX'`` error, that means the layer is
not available to the current backend. If you wish a module to appear in DGL,
please `create an issue <https://github.com/dmlc/dgl/issues>`_ started with
"[Feature Request] NN Module XXXModel". If you want to contribute a NN module,
please `create a pull request <https://github.com/dmlc/dgl/pulls>`_ started
with "[NN] XXX module".
"""
import importlib
import sys
import os
......
......@@ -8,14 +8,12 @@ from . import ndarray as nd
__all__ = ['seed']
def seed(val):
"""Set the seed of randomized methods in DGL.
The randomized methods include various samplers and random walk routines.
"""Set the random seed of DGL.
Parameters
----------
val : int
The seed
The seed.
"""
_CAPI_SetSeed(val)
......@@ -41,8 +39,6 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
It out-performs numpy for non-uniform sampling in general cases.
TODO(minjie): support RNG as one of the arguments.
Parameters
----------
a : 1-D tensor or int
......@@ -61,6 +57,7 @@ def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name
samples : 1-D tensor
The generated random samples
"""
#TODO(minjie): support RNG as one of the arguments.
if isinstance(size, tuple):
num = np.prod(size)
else:
......
......@@ -28,9 +28,9 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
feat : str
Node feature name.
weight : str, optional
Node weight name. If None, no weighting will be performed,
otherwise, weight each node feature with field :attr:`feat`.
for aggregation. The weight feature shape must be compatible with
Node weight name. None means aggregating without weights.
Otherwise, multiply each node feature by node feature :attr:`weight`
before aggregation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor.
op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'.
......@@ -39,7 +39,7 @@ def readout_nodes(graph, feat, weight=None, *, op='sum', ntype=None):
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -101,22 +101,28 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Edge feature name.
The edge feature name.
weight : str, optional
Edge weight name. If None, no weighting will be performed,
The edge weight feature name. If None, no weighting will be performed,
otherwise, weight each edge feature with field :attr:`feat`.
for summation. The weight feature shape must be compatible with
an element-wise multiplication with the feature tensor.
op : str, optional
Readout operator. Can be 'sum', 'max', 'min', 'mean'.
etype : str, tuple of str, optional
Edge type. Can be omitted if there is only one edge type in the graph.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -166,31 +172,55 @@ def readout_edges(graph, feat, weight=None, *, op='sum', etype=None):
def sum_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='sum')
def sum_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='sum')
def mean_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='mean')
def mean_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='mean')
def max_nodes(graph, feat, weight=None, *, ntype=None):
"""Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``.
See Also
--------
readout_nodes
"""
return readout_nodes(graph, feat, weight, ntype=ntype, op='max')
def max_edges(graph, feat, weight=None, *, etype=None):
"""Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``.
See Also
--------
readout_edges
"""
return readout_edges(graph, feat, weight, etype=etype, op='max')
......@@ -210,15 +240,15 @@ def softmax_nodes(graph, feat, *, ntype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Node feature name.
The node feature name.
ntype : str, optional
Node type. Can be omitted if there is only one node type in the graph.
The node type name. Can be omitted if there is only one node type in the graph.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -269,15 +299,21 @@ def softmax_edges(graph, feat, *, etype=None):
Parameters
----------
graph : DGLGraph.
Input graph.
The input graph.
feat : str
Edge feature name.
etype : str, typle of str, optional
Edge type. Can be omitted if there is only one edge type in the graph.
The edge feature name.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Returns
-------
tensor
Tensor
Result tensor.
Examples
......@@ -535,9 +571,10 @@ def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype):
topk_indices
def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in
:attr:`graph` by feature at index :attr:`sortby`. If :attr:
`descending` is set to False, return the k smallest elements instead.
"""Return a graph-level representation by a graph-wise top-k on
node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling
......@@ -569,6 +606,11 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
:math:`B` is the batch size of the input graph, :math:`D`
is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples
--------
......@@ -631,20 +673,16 @@ def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
"""
return _topk_on(graph, 'nodes', feat, k,
descending=descending, sortby=sortby,
ntype_or_etype=ntype)
def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
"""Perform a graph-wise top-k on node features :attr:`feat` in
:attr:`graph` by feature at index :attr:`sortby`. If :attr:
`descending` is set to False, return the k smallest elements instead.
"""Return a graph-level representation by a graph-wise top-k
on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`.
If :attr:`descending` is set to False, return the k smallest elements instead.
If :attr:`sortby` is set to None, the function would perform top-k on
all dimensions independently, equivalent to calling
......@@ -676,6 +714,11 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
:math:`B` is the batch size of the input graph, :math:`D`
is the feature size.
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k` th rows with zero;
Examples
--------
......@@ -738,11 +781,6 @@ def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None):
[0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1],
[3, 2, 0, 2, 2],
[2, 3, 2, 1, 3]]]))
Notes
-----
If an example has :math:`n` nodes and :math:`n<k`, the ``sorted_feat``
tensor will pad the :math:`n+1` to :math:`k`th rows with zero;
"""
return _topk_on(graph, 'edges', feat, k,
descending=descending, sortby=sortby,
......
"""This module contains the implementations of various sampling operators.
"""The ``dgl.sampling`` package contains operators and utilities for
sampling from a graph via random walks, neighbor sampling, etc. They
are typically used together with the ``DataLoader`` s in the
``dgl.dataloading`` package. The user guide :ref:`guide-minibatch`
gives a holistic explanation on how different components work together.
"""
from .randomwalks import *
from .pinsage import *
from .neighbor import *
......@@ -18,92 +18,102 @@ __all__ = ['node_subgraph', 'edge_subgraph', 'node_type_subgraph', 'edge_type_su
'in_subgraph', 'out_subgraph']
def node_subgraph(graph, nodes):
"""Return the subgraph induced on given nodes.
"""Return a subgraph induced on the given nodes.
The metagraph of the returned subgraph is the same as the parent graph.
Features are copied from the original graph.
A node-induced subgraph is a subset of the nodes of a graph together with
any edges whose endpoints are both in this subset. In addition to extracting
the subgraph, DGL conducts the following:
* Relabel the extracted nodes to IDs starting from zero.
* Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
* Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters
----------
graph : DGLGraph
The graph to extract subgraphs from.
nodes : list or dict[str->list or iterable]
A dictionary mapping node types to node ID array for constructing
subgraph. All nodes must exist in the graph.
nodes : nodes or dict[str, nodes]
The nodes to form the subgraph. The allowed nodes formats are:
If the graph only has one node type, one can just specify a list,
tensor, or any iterable of node IDs intead.
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
* Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
node :math:`i` is in the subgraph.
The node ID array can be either an interger tensor or a bool tensor.
When a bool tensor is used, it is automatically converted to
an interger tensor using the semantic of np.where(nodes_idx == True).
Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
tensors are supported.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the nodes.
Returns
-------
G : DGLGraph
The subgraph.
The nodes and edges in the subgraph are relabeled using consecutive
integers from 0.
One can retrieve the mapping from subgraph node/edge ID to parent
node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> sg = dgl.node_subgraph(g, [0, 1, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 2]), tensor([1, 0]))
>>> sg.ndata[dgl.NID] # original node IDs
tensor([0, 1, 4])
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 4])
Specify nodes using a boolean mask.
>>> nodes = torch.tensor([True, True, False, False, True]) # choose nodes [0, 1, 4]
>>> dgl.node_subgraph(g, nodes)
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
The resulting subgraph also copies features from the parent graph.
>>> g.ndata['x'] = torch.arange(10).view(5, 2)
>>> sg = dgl.node_subgraph(g, [0, 1, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.ndata['x']
tensor([[0, 1],
[2, 3],
[8, 9]])
Extract a subgraph from a hetergeneous graph.
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
>>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
>>> })
>>> # Set node features
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> g.subgraph({'user': [4, 5]})
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.subgraph({'user': [1, 2]})
>>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 0},
num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get subgraphs using boolean mask tensor.
>>> sub_g = g.subgraph({'user': th.tensor([False, True, True])})
>>> print(sub_g)
>>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]})
>>> sub_g
Graph(num_nodes={'user': 2, 'game': 0},
num_edges={('user', 'plays', 'game'): 0, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get the original node/edge indices.
>>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
tensor([1, 2])
>>> sub_g['follows'].edata[dgl.EID] # Get the edge indices in the raw graph
tensor([1, 2])
Get the copied node features.
>>> sub_g.nodes['user'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.nodes['user'].data['h'] += 1
>>> g.nodes['user'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See Also
--------
edge_subgraph
......@@ -129,106 +139,124 @@ def node_subgraph(graph, nodes):
induced_edges = sgi.induced_edges
return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
DGLHeteroGraph.subgraph = node_subgraph
DGLHeteroGraph.subgraph = utils.alias_func(node_subgraph)
def edge_subgraph(graph, edges, preserve_nodes=False):
"""Return the subgraph induced on given edges.
The metagraph of the returned subgraph is the same as the parent graph.
"""Return a subgraph induced on the given edges.
Features are copied from the original graph.
Parameters
----------
graph : DGLGraph
The graph to extract subgraphs from.
edges : dict[(str, str, str), Tensor]
A dictionary mapping edge types to edge ID array for constructing
subgraph. All edges must exist in the subgraph.
An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
the subgraph, DGL conducts the following:
The edge types are characterized by triplets of
``(src type, etype, dst type)``.
* Relabel the incident nodes to IDs starting from zero. Isolated nodes are removed.
If the graph only has one edge type, one can just specify a list,
tensor, or any iterable of edge IDs intead.
* Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
The edge ID array can be either an interger tensor or a bool tensor.
When a bool tensor is used, it is automatically converted to
an interger tensor using the semantic of np.where(edges_idx == True).
* Store the IDs of the extracted nodes and edges in the ``ndata`` and ``edata``
of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively.
Note: When using bool tensor, only backend (torch, tensorflow, mxnet)
tensors are supported.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
preserve_nodes : bool
Whether to preserve all nodes or not. If false, all nodes
without edges will be removed. (Default: False)
Parameters
----------
graph : DGLGraph
The graph to extract the subgraph from.
edges : dict[(str, str, str), edges]
The edges to form the subgraph. The allowed edges formats are:
* Int Tensor: Each element is an edge ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an edge ID.
* Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether
edge :math:`i` is in the subgraph.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being edge types
and values being the nodes.
preserve_nodes : bool, optional
If true, do not relabel the incident nodes and remove the isolated nodes
in the extracted subgraph. (Default: False)
Returns
-------
G : DGLGraph
The subgraph.
The nodes and edges are relabeled using consecutive integers from 0.
One can retrieve the mapping from subgraph node/edge ID to parent
node/edge ID via ``dgl.NID`` and ``dgl.EID`` node/edge features of the
subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> sg = dgl.edge_subgraph(g, [0, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 1]), tensor([2, 0]))
>>> sg.ndata[dgl.NID] # original node IDs
tensor([0, 4, 1])
>>> sg.edata[dgl.EID] # original edge IDs
tensor([0, 4])
Extract a subgraph without node relabeling.
>>> sg = dgl.edge_subgraph(g, [0, 4], preserve_nodes=True)
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([0, 4]), tensor([1, 0]))
Specify edges using a boolean mask.
>>> nodes = torch.tensor([True, False, False, False, True]) # choose edges [0, 4]
>>> dgl.edge_subgraph(g, nodes)
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
The resulting subgraph also copies features from the parent graph.
>>> g.ndata['x'] = torch.arange(10).view(5, 2)
>>> sg = dgl.edge_subgraph(g, [0, 4])
>>> sg
Graph(num_nodes=3, num_edges=2,
ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.ndata[dgl.NID]
tensor([0, 4, 1])
>>> sg.ndata['x']
tensor([[0, 1],
[8, 9],
[2, 3]])
Extract a subgraph from a hetergeneous graph.
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
>>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])
>>> })
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
>>> ('user', 'plays', 'game'): [2]})
>>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2],
... ('user', 'plays', 'game'): [2]})
>>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 1},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get subgraphs using boolean mask tensor.
>>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): th.tensor([False, True, True]),
>>> ('user', 'plays', 'game'): th.tensor([False, False, True, False])})
>>> sub_g
Graph(num_nodes={'user': 2, 'game': 1},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game'), ('user', 'user')])
Get the original node/edge indices.
>>> sub_g['follows'].ndata[dgl.NID] # Get the node indices in the raw graph
tensor([1, 2])
>>> sub_g['plays'].edata[dgl.EID] # Get the edge indices in the raw graph
tensor([2])
Get the copied node features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See Also
--------
subgraph
node_subgraph
"""
if graph.is_block:
raise DGLError('Extracting subgraph from a block graph is not allowed.')
......@@ -252,73 +280,82 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
induced_nodes = sgi.induced_nodes
return _create_hetero_subgraph(graph, sgi, induced_nodes, induced_edges)
DGLHeteroGraph.edge_subgraph = edge_subgraph
DGLHeteroGraph.edge_subgraph = utils.alias_func(edge_subgraph)
def in_subgraph(g, nodes):
"""Return the subgraph induced on the inbound edges of all edge types of the
"""Return the subgraph induced on the inbound edges of all the edge types of the
given nodes.
All the nodes are preserved regardless of whether they have an edge or not.
An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
the subgraph, DGL conducts the following:
The metagraph of the returned subgraph is the same as the parent graph.
* Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Features are copied from the original graph.
* Store the IDs of the extracted edges in the ``edata``
of the resulting graph under name ``dgl.EID``.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters
----------
g : DGLGraph
Full graph structure.
nodes : tensor or dict
Node ids to sample neighbors from. The allowed types
are dictionary of node types to node id tensors, or simply node id tensor if
the given graph g has only one type of nodes.
The input graph.
nodes : nodes or dict[str, nodes]
The nodes to form the subgraph. The allowed nodes formats are:
* Int Tensor: Each element is an ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is an ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the nodes.
Returns
-------
DGLGraph
The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.in_subgraph(g, [2, 0])
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([1, 4]), tensor([2, 0]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([1, 4])
>>> sg.edata['w'] # also extract the features
tensor([[2, 3],
[8, 9]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
>>> print(sub_g)
>>> sub_g
Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also
--------
out_subgraph
......@@ -341,73 +378,82 @@ def in_subgraph(g, nodes):
induced_edges = sgi.induced_edges
return _create_hetero_subgraph(g, sgi, None, induced_edges)
DGLHeteroGraph.in_subgraph = in_subgraph
DGLHeteroGraph.in_subgraph = utils.alias_func(in_subgraph)
def out_subgraph(g, nodes):
"""Return the subgraph induced on the outbound edges of all edge types of the
"""Return the subgraph induced on the out-bound edges of all the edge types of the
given nodes.
All the nodes are preserved regardless of whether they have an edge or not.
An edge-induced subgraph is equivalent to creating a new graph
with the same number of nodes using the given edges. In addition to extracting
the subgraph, DGL conducts the following:
The metagraph of the returned subgraph is the same as the parent graph.
* Copy the features of the extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Features are copied from the original graph.
* Store the IDs of the extracted edges in the ``edata``
of the resulting graph under name ``dgl.EID``.
If the graph is heterogeneous, DGL extracts a subgraph per relation and composes
them as the resulting graph. Thus, the resulting graph has the same set of relations
as the input one.
Parameters
----------
g : DGLGraph
Full graph structure.
nodes : tensor or dict
Node ids to sample neighbors from. The allowed types
are dictionary of node types to node id tensors, or simply node id tensor if
the given graph g has only one type of nodes.
The input graph.
nodes : nodes or dict[str, nodes]
The nodes to form the subgraph. The allowed nodes formats are:
* Int Tensor: Each element is a node ID. The tensor must have the same device type
and ID data type as the graph's.
* iterable[int]: Each element is a node ID.
If the graph is homogeneous, one can directly pass the above formats.
Otherwise, the argument must be a dictionary with keys being node types
and values being the nodes.
Returns
-------
DGLGraph
The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
Extract a subgraph from a homogeneous graph.
>>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle
>>> g.edata['w'] = torch.arange(10).view(5, 2)
>>> sg = dgl.out_subgraph(g, [2, 0])
>>> sg
Graph(num_nodes=5, num_edges=2,
ndata_schemes={}
edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> sg.edges()
(tensor([2, 0]), tensor([3, 1]))
>>> sg.edata[dgl.EID] # original edge IDs
tensor([2, 0])
>>> sg.edata['w'] # also extract the features
tensor([[4, 5],
[0, 1]])
Extract a subgraph from a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.out_subgraph({'user': [1]})
>>> print(sub_g)
>>> sub_g
Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([1, 2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also
--------
in_subgraph
......@@ -430,22 +476,23 @@ def out_subgraph(g, nodes):
induced_edges = sgi.induced_edges
return _create_hetero_subgraph(g, sgi, None, induced_edges)
DGLHeteroGraph.out_subgraph = out_subgraph
DGLHeteroGraph.out_subgraph = utils.alias_func(out_subgraph)
def node_type_subgraph(graph, ntypes):
"""Return the subgraph induced on given node types.
The metagraph of the returned subgraph is the subgraph of the original
metagraph induced from the node types.
Features are shared with the original graph.
A node-type-induced subgraph contains all the nodes of the given subset of
the node types of a graph and any edges whose endpoints are both in this subset.
In addition to extracting the subgraph, DGL also copies the features of the
extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Parameters
----------
graph : DGLGraph
The graph to extract subgraphs from.
ntypes : list[str]
The node types
The type names of the nodes in the subgraph.
Returns
-------
......@@ -456,6 +503,9 @@ def node_type_subgraph(graph, ntypes):
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterograph.
>>> g = dgl.heterograph({
......@@ -473,17 +523,12 @@ def node_type_subgraph(graph, ntypes):
ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}
edata_schemes={})
Get the shared node features.
Get the extracted node features.
>>> sub_g.nodes['user'].data['h']
tensor([[0.],
[1.],
[2.]])
>>> sub_g.nodes['user'].data['h'] += 1
>>> g.nodes['user'].data['h'] # Features are shared.
tensor([[1.],
[2.],
[3.]])
See Also
--------
......@@ -498,22 +543,28 @@ def node_type_subgraph(graph, ntypes):
etypes.append(graph.canonical_etypes[etid])
return edge_type_subgraph(graph, etypes)
DGLHeteroGraph.node_type_subgraph = node_type_subgraph
DGLHeteroGraph.node_type_subgraph = utils.alias_func(node_type_subgraph)
def edge_type_subgraph(graph, etypes):
"""Return the subgraph induced on given edge types.
The metagraph of the returned subgraph is the subgraph of the original metagraph
induced from the edge types.
Features are shared with the original graph.
An edge-type-induced subgraph contains all the edges of the given subset of
the edge types of a graph and the nodes incident by those edges.
In addition to extracting the subgraph, DGL also copies the features of the
extracted nodes and edges to the resulting graph.
The copy is *lazy* and incurs data movement only when needed.
Parameters
----------
graph : DGLGraph
The graph to extract subgraphs from.
etypes : list[str or tuple]
The edge types
etypes : list[str] or list[(str, str, str)]
The type names of the edges in the subgraph. The allowed type name
formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` for the edge type name if the name can uniquely identify a
triplet format in the graph.
Returns
-------
......@@ -524,6 +575,9 @@ def edge_type_subgraph(graph, etypes):
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterograph.
>>> g = dgl.heterograph({
......@@ -536,7 +590,7 @@ def edge_type_subgraph(graph, etypes):
Get subgraphs.
>>> sub_g = g.edge_type_subgraph(['follows'])
>>> print(sub_g)
>>> sub_g
Graph(num_nodes=3, num_edges=3,
ndata_schemes={}
edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)})
......@@ -547,11 +601,6 @@ def edge_type_subgraph(graph, etypes):
tensor([[0.],
[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are shared.
tensor([[1.],
[2.],
[3.]])
See Also
--------
......@@ -579,7 +628,7 @@ def edge_type_subgraph(graph, etypes):
hg = DGLHeteroGraph(hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames)
return hg
DGLHeteroGraph.edge_type_subgraph = edge_type_subgraph
DGLHeteroGraph.edge_type_subgraph = utils.alias_func(edge_type_subgraph)
#################### Internal functions ####################
......
......@@ -59,30 +59,29 @@ def pairwise_squared_distance(x):
#pylint: disable=invalid-name
def knn_graph(x, k):
"""Convert a tensor into k-nearest-neighbor (KNN) graph(s) according
to Euclidean distance.
"""Construct a graph from a set of points according to k-nearest-neighbor (KNN)
and return.
The function transforms the coordinates/features of a point set
into a directed homogeneous graph. The coordinates of the point
into a directed homogeneous graph. The coordinates of the point
set is specified as a matrix whose rows correspond to points and
columns correspond to coordinate/feature dimensions.
The nodes of the returned graph correspond to the points. An edge
exists if the source node is one of the k-nearest neighbors of the
destination node.
The nodes of the returned graph correspond to the points, where the predecessors
of each point are its k-nearest neighbors measured by the Euclidean distance.
If you give a 3D tensor, then each submatrix will be transformed
into a separate graph. DGL then composes the graphs into a large
If :attr:`x` is a 3D tensor, then each submatrix will be transformed
into a separate graph. DGL then composes the graphs into a large
graph of multiple connected components.
Parameters
----------
x : 2D or 3D Tensor
The input tensor. It can be either on CPU or GPU.
x : Tensor
The point coordinates. It can be either on CPU or GPU.
* If 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
* If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
* If 3D, ``x[i]`` corresponds to the i-th KNN graph and
* If is 3D, ``x[i]`` corresponds to the i-th KNN graph and
``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
k : int
The number of nearest neighbors per node.
......@@ -90,7 +89,7 @@ def knn_graph(x, k):
Returns
-------
DGLGraph
The graph. The node IDs are in the same order as :attr:`x`.
The constructred graph. The node IDs are in the same order as :attr:`x`.
The returned graph is on CPU, regardless of the context of input :attr:`x`.
......@@ -152,22 +151,25 @@ def knn_graph(x, k):
#pylint: disable=invalid-name
def segmented_knn_graph(x, k, segs):
"""Convert a tensor into multiple k-nearest-neighbor (KNN) graph(s)
with different number of nodes.
"""Construct multiple graphs from multiple sets of points according to
k-nearest-neighbor (KNN) and return.
Each chunk of :attr:`x` contains coordinates/features of a point set.
Compared with :func:`dgl.knn_graph`, this allows multiple point sets with
different capacity. The points from different sets are stored contiguously
in the :attr:`x` tensor.
:attr:`segs` specifies the number of points in each point set. The
function constructs a KNN graph for each point set, where the predecessors
of each point are its k-nearest neighbors. DGL then composes all KNN graphs
of each point are its k-nearest neighbors measured by the Euclidean distance.
DGL then composes all KNN graphs
into a graph with multiple connected components.
Parameters
----------
x : 2D Tensor
Coordinates/features of points. It can be either on CPU or GPU.
x : Tensor
Coordinates/features of points. Must be 2D. It can be either on CPU or GPU.
k : int
The number of nearest neighbors per node.
segs : list of int
segs : list[int]
Number of points in each point set. The numbers in :attr:`segs`
must sum up to the number of rows in :attr:`x`.
......@@ -222,37 +224,28 @@ def segmented_knn_graph(x, k, segs):
return convert.from_scipy(adj)
def to_bidirected(g, readonly=None, copy_ndata=False):
r"""Convert the graph to a bidirectional simple graph, adding reverse edges and
removing parallel edges.
The function generates a new graph with no edge features. In the new graph,
a single edge ``(u, v)`` exists if and only if there exists an edge connecting ``u``
to ``v`` or an edge connecting ``v`` to ``u`` in the original graph.
def to_bidirected(g, copy_ndata=False, readonly=None):
r"""Convert the graph to a bi-directional simple graph and return.
For a heterogeneous graph with multiple edge types, DGL treats edges corresponding
to each type as a separate graph and convert the graph to a bidirected one
for each of them.
For an input graph :math:`G`, return a new graph :math:`G'` such that an edge
:math:`(u, v)\in G'` if and only if there exists an edge :math:`(u, v)\in G` or
an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph,
meaning there is no parallel edge.
Since :func:`to_bidirected` **is not well defined for unidirectional
bipartite graphs**, DGL will raise an error if an edge type whose source node type is
different from the destination node type exists.
The operation only works for edges whose two endpoints belong to the same node type.
DGL will raise error if the input graph is heterogeneous and contains edges
with different types of endpoints.
Parameters
----------
g : DGLGraph
The input graph.
readonly : bool
Deprecated. There will be no difference between readonly and non-readonly
(Default: True)
copy_ndata: bool, optional
If True, the node features of the bidirected graph are copied from the
original graph.
If False, the bidirected graph will not have any node features.
original graph. If False, the bidirected graph will not have any node features.
(Default: False)
readonly : bool
**DEPRECATED**.
Returns
-------
......@@ -261,10 +254,9 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
Notes
-----
If :attr:`copy_ndata` is True, same tensors will be used for
the features of the original graph and the returned graph to save memory cost.
As a result, users should avoid performing in-place operations on the features of
the returned graph, which will corrupt the features of the original graph as well.
If :attr:`copy_ndata` is True, the resulting graph will share the node feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs.
Examples
--------
......@@ -314,24 +306,21 @@ def to_bidirected(g, readonly=None, copy_ndata=False):
def add_reverse_edges(g, readonly=None, copy_ndata=True,
copy_edata=False, ignore_bipartite=False):
r"""Add reverse edges to a graph.
r"""Add an reversed edge for each edge in the input graph and return a new graph.
For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
function creates a new graph with edges
:math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.
For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
to each type as a separate graph and add reverse edges for each of them.
Since :func:`add_reverse_edges` **is not well defined for unidirectional bipartite graphs**,
an error will be raised if an edge type of the input heterogeneous graph is for a
unidirectional bipartite graph. DGL simply skips the edge types corresponding
to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.
The operation only works for edges whose two endpoints belong to the same node type.
DGL will raise error if the input graph is heterogeneous and contains edges
with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will
ignore those edges instead.
Parameters
----------
g : DGLGraph
The input graph. Can be on either CPU or GPU.
The input graph.
readonly : bool, default to be True
Deprecated. There will be no difference between readonly and non-readonly
copy_ndata: bool, optional
......@@ -360,13 +349,10 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
Notes
-----
If :attr:`copy_ndata` is True, same tensors are used as
the node features of the original graph and the new graph.
As a result, users should avoid performing in-place operations
on the node features of the new graph to avoid feature corruption.
On the contrary, edge features are concatenated,
and they are not shared due to concatenation.
If :attr:`copy_ndata` is True, the resulting graph will share the node feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs. On the contrary, the two graphs do not share
the same edge feature storage.
Examples
--------
......@@ -377,7 +363,7 @@ def add_reverse_edges(g, readonly=None, copy_ndata=True,
>>> bg1.edges()
(tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))
**Heterogeneous graphs with Multiple Edge Types**
**Heterogeneous graphs**
>>> g = dgl.heterograph({
>>> ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
......@@ -489,12 +475,11 @@ def line_graph(g, backtracking=True, shared=False):
Notes
-----
If :attr:`shared` is True, same tensors will be used for
the features of the original graph and the returned graph to save memory cost.
As a result, users should avoid performing in-place operations on the features of
the returned graph, which will corrupt the features of the original graph as well.
* If :attr:`shared` is True, the node features of the resulting graph share the same
storage with the edge features of the input graph. Hence, users should try to
avoid in-place operations which will be visible to both graphs.
The implementation is done on CPU, even if the input and output graphs are on GPU.
* The function supports input graph on GPU but copies it to CPU during computation.
Examples
--------
......@@ -532,15 +517,13 @@ def line_graph(g, backtracking=True, shared=False):
return lg
DGLHeteroGraph.line_graph = line_graph
DGLHeteroGraph.line_graph = utils.alias_func(line_graph)
def khop_adj(g, k):
"""Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
:math:`g`, where rows represent source nodes and columns represent destination nodes.
The returned matrix is a 32-bit float dense matrix on CPU.
:math:`g`.
The graph must be homogeneous.
The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous.
Parameters
----------
......@@ -551,7 +534,7 @@ def khop_adj(g, k):
Returns
-------
tensor
Tensor
The returned tensor.
Examples
......@@ -607,10 +590,9 @@ def khop_graph(g, k, copy_ndata=True):
Notes
-----
If :attr:`copy_ndata` is True, same tensors will be used for
the features of the original graph and the returned graph to save memory cost.
As a result, users should avoid performing in-place operations on the features of
the returned graph, which will corrupt the features of the original graph as well.
If :attr:`copy_ndata` is True, the resulting graph will share the node feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs.
Examples
--------
......@@ -656,19 +638,11 @@ def khop_graph(g, k, copy_ndata=True):
return new_g
def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
r"""Return the reverse of a graph.
r"""Return a new graph with every edges being the reverse ones in the input graph.
The reverse (also called converse, transpose) of a graph with edges
:math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges
:math:`(j_1, i_1), (j_2, i_2), \cdots`.
For a heterogeneous graph with multiple edge types, DGL treats the edges corresponding
to each type as a separate graph and compute the reverse for each of them.
If the original edge type is ``(A, B, C)``, its reverse will have edge type
``(C, B, A)``.
Given a :class:`DGLGraph` object, DGL returns another :class:`DGLGraph`
object representing its reverse.
:math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges
:math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``.
Parameters
----------
......@@ -676,17 +650,11 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
The input graph.
copy_ndata: bool, optional
If True, the node features of the reversed graph are copied from the
original graph.
If False, the reversed graph will not have any node features.
original graph. If False, the reversed graph will not have any node features.
(Default: True)
copy_edata: bool, optional
If True, the edge features of the reversed graph are copied from the
original graph.
If False, the reversed graph will not have any edge features.
original graph. If False, the reversed graph will not have any edge features.
(Default: False)
Return
......@@ -696,14 +664,14 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
Notes
-----
If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors will be used for
the features of the original graph and the reversed graph to save memory cost.
As a result, users should avoid performing in-place operations on the features of
the reversed graph, which will corrupt the features of the original graph as well.
If :attr:`copy_ndata` or :attr:`copy_edata` is True,
the resulting graph will share the node or edge feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs.
Examples
--------
**Homogeneous graphs or Heterogeneous graphs with A Single Edge Type**
**Homogeneous graphs**
Create a graph to reverse.
......@@ -731,24 +699,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
[4.],
[5.]])
**In-place operations on features of one graph will be reflected on features of
its reverse, which is dangerous. Out-place operations will not be reflected.**
>>> rg.ndata['h'] += 1
>>> g.ndata['h']
tensor([[1.],
[2.],
[3.]])
>>> g.ndata['h'] += 1
>>> rg.ndata['h']
tensor([[2.],
[3.],
[4.]])
>>> rg.ndata['h2'] = th.ones(3, 1)
>>> 'h2' in g.ndata
False
**Heterogenenous graphs with Multiple Edge Types**
**Heterogenenous graphs**
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
......@@ -758,7 +709,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
>>> g.edges['plays'].data['he'] = th.zeros(3, 1)
The resulting graph will have edge types
``('user', 'follows', 'user)`` and ``('user', 'plays', 'game')``.
``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``.
>>> rg = dgl.reverse(g, copy_ndata=True)
>>> rg
......@@ -804,7 +755,7 @@ def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_eda
return new_g
DGLHeteroGraph.reverse = reverse
DGLHeteroGraph.reverse = utils.alias_func(reverse)
def to_simple_graph(g):
"""Convert the graph to a simple graph with no multi-edge.
......@@ -874,14 +825,15 @@ def to_bidirected_stale(g, readonly=True):
def laplacian_lambda_max(g):
"""Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
If the graph is batched from multiple graphs, return the list of the largest eigenvalue
for each graph instead.
Parameters
----------
g : DGLGraph
The input graph, it should be an undirected graph. It must be homogeneous.
The input graph, it must be a bi-directed homogeneous graph, i.e., every edge
should have an accompanied reverse edge in the graph.
The graph can be batched from multiple graphs.
Returns
......@@ -938,7 +890,7 @@ def metapath_reachable_graph(g, metapath):
Returns
-------
DGLGraph
A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
whether the input graph is on CPU or GPU.
Examples
......@@ -970,21 +922,20 @@ def metapath_reachable_graph(g, metapath):
return new_g
def add_nodes(g, num, data=None, ntype=None):
r"""Append new nodes of the given node type.
r"""Add the given number of nodes to the graph and return a new graph.
The new nodes will have IDs starting from ``g.number_of_nodes(ntype)``.
A new graph with newly added nodes is returned.
The new nodes will have IDs starting from ``g.num_nodes(ntype)``.
Parameters
----------
num : int
Number of nodes to add.
data : dict, optional
Feature data of the added nodes.
The number of nodes to add.
data : dict[str, Tensor], optional
Feature data of the added nodes. The keys are feature names
while the values are feature data.
ntype : str, optional
The type of the new nodes. Can be omitted if there is
only one node type in the graph.
The node type name. Can be omitted if there is
only one type of nodes in the graph.
Return
------
......@@ -993,11 +944,10 @@ def add_nodes(g, num, data=None, ntype=None):
Notes
-----
* If the key of :attr:`data` does not contain some existing feature fields,
those features for the new nodes will be filled with zeros).
* If the key of :attr:`data` contains new feature fields, those features for
the old nodes will be filled zeros).
* For features in :attr:`g` but not in :attr:`data`,
DGL assigns zero features for the newly added nodes.
* For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
for the existing nodes in the graph.
Examples
--------
......@@ -1007,7 +957,7 @@ def add_nodes(g, num, data=None, ntype=None):
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_nodes()
......@@ -1017,26 +967,26 @@ def add_nodes(g, num, data=None, ntype=None):
5
If the graph has some node features and new nodes are added without
features, their features will be created with zeros.
features, their features will be filled with zeros.
>>> g.ndata['h'] = torch.ones(5, 1)
>>> g = dgl.add_nodes(g, 1)
>>> g.ndata['h']
tensor([[1.], [1.], [1.], [1.], [1.], [0.]])
You can also assign features for the new nodes in adding new nodes.
Assign features for the new nodes.
>>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
>>> g.ndata['h']
tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])
Since :attr:`data` contains new feature fields, the features for old nodes
will be created with zeros.
Since :attr:`data` contains new feature fields, the features for existing nodes
will be filled with zeros.
>>> g.ndata['w']
tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])
**Heterogeneous Graphs with Multiple Node Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
......@@ -1061,25 +1011,28 @@ def add_nodes(g, num, data=None, ntype=None):
return g
def add_edges(g, u, v, data=None, etype=None):
r"""Append multiple new edges for the specified edge type.
A new graph with newly added edges is returned.
r"""Add the edges to the graph and return a new graph.
The i-th new edge will be from ``u[i]`` to ``v[i]``. The IDs of the new
edges will start from ``g.number_of_edges(etype)``.
edges will start from ``g.num_edges(etype)``.
Parameters
----------
u : int, tensor, numpy.ndarray, list
u : int, Tensor or iterable[int]
Source node IDs, ``u[i]`` gives the source node for the i-th new edge.
v : int, tensor, numpy.ndarray, list
v : int, Tensor or iterable[int]
Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge.
data : dict, optional
Feature data of the added edges. The i-th row of the feature data
corresponds to the i-th new edge.
etype : str or tuple of str, optional
The type of the new edges. Can be omitted if there is
only one edge type in the graph.
data : dict[str, Tensor], optional
Feature data of the added edges. The keys are feature names
while the values are feature data.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Return
------
......@@ -1088,15 +1041,13 @@ def add_edges(g, u, v, data=None, etype=None):
Notes
-----
* If end nodes of adding edges does not exists, add_nodes is invoked
to add new nodes. The node features of the new nodes will be created
with zeros.
* If the key of :attr:`data` does not contain some existing feature fields,
those features for the new edges will be created with zeros.
* If the key of :attr:`data` contains new feature fields, those features for
the old edges will be created with zeros.
* If the end nodes of the given edges do not exist in :attr:`g`,
:func:`dgl.add_nodes` is invoked to add those nodes.
The node features of the new nodes will be filled with zeros.
* For features in :attr:`g` but not in :attr:`data`,
DGL assigns zero features for the newly added nodes.
* For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
for the existing nodes in the graph.
Examples
--------
......@@ -1105,7 +1056,7 @@ def add_edges(g, u, v, data=None, etype=None):
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_edges()
......@@ -1121,7 +1072,7 @@ def add_edges(g, u, v, data=None, etype=None):
4
If the graph has some edge features and new edges are added without
features, their features will be created with zeros.
features, their features will be filled with zeros.
>>> g.edata['h'] = torch.ones(4, 1)
>>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
......@@ -1136,12 +1087,12 @@ def add_edges(g, u, v, data=None, etype=None):
tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
Since :attr:`data` contains new feature fields, the features for old edges
will be created with zeros.
will be filled with zeros.
>>> g.edata['w']
tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])
**Heterogeneous Graphs with Multiple Edge Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
......@@ -1166,22 +1117,24 @@ def add_edges(g, u, v, data=None, etype=None):
return g
def remove_edges(g, eids, etype=None):
r"""Remove multiple edges with the specified edge type.
A new graph with certain edges deleted is returned.
Nodes will not be removed. After removing edges, the rest
edges will be re-indexed using consecutive integers from 0,
with their relative order preserved.
r"""Remove the specified edges and return a new graph.
The features for the removed edges will be removed accordingly.
Also delete the features of the edges. The edges must exist in the graph.
The resulting graph has the same number of the nodes as the input one,
even if some nodes become isolated after the the edge removal.
Parameters
----------
eids : int, tensor, numpy.ndarray, list
IDs for the edges to remove.
etype : str or tuple of str, optional
The type of the edges to remove. Can be omitted if there is
only one edge type in the graph.
eids : int, Tensor, iterable[int]
The IDs of the edges to remove.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Return
------
......@@ -1193,7 +1146,7 @@ def remove_edges(g, eids, etype=None):
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
>>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
......@@ -1207,7 +1160,7 @@ def remove_edges(g, eids, etype=None):
>>> g.edata['he']
tensor([[2.]])
**Heterogeneous Graphs with Multiple Edge Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
......@@ -1231,19 +1184,16 @@ def remove_edges(g, eids, etype=None):
def remove_nodes(g, nids, ntype=None):
r"""Remove multiple nodes with the specified node type.
A new graph with certain nodes deleted is returned.
Edges that connect to the nodes will be removed as well. After removing
nodes and edges, the rest nodes and edges will be re-indexed using
consecutive integers from 0, with their relative order preserved.
r"""Remove the specified nodes and return a new graph.
The features for the removed nodes/edges will be removed accordingly.
Also delete the features. Edges that connect from/to the nodes will be
removed as well. After the removal, DGL re-labels the remaining nodes and edges
with IDs from 0.
Parameters
----------
nids : int, tensor, numpy.ndarray, list
Nodes to remove.
nids : int, Tensor, iterable[int]
The nodes to be removed.
ntype : str, optional
The type of the nodes to remove. Can be omitted if there is
only one node type in the graph.
......@@ -1259,7 +1209,7 @@ def remove_nodes(g, nids, ntype=None):
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
......@@ -1274,7 +1224,7 @@ def remove_nodes(g, nids, ntype=None):
>>> g.edata['he']
tensor([[2.]])
**Heterogeneous Graphs with Multiple Node Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
......@@ -1301,42 +1251,41 @@ def remove_nodes(g, nids, ntype=None):
return g
def add_self_loop(g, etype=None):
r"""Add self-loop for each node in the graph for the given edge type.
A new graph with self-loop is returned.
If the graph is heterogeneous, the given edge type must have its source
node type the same as its destination node type.
r"""Add self-loops for each node in the graph and return a new graph.
Parameters
----------
g : DGLGraph
The graph.
etype : str or tuple of str, optional
The type of the edges to remove. Can be omitted if there is
only one edge type in the graph.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Its source node type must be the same as its destination node type.
Can be omitted if the graph has only one type of edges.
Return
------
DGLGraph
The graph with self-loop.
The graph with self-loops.
Notes
-----
* :func:`add_self_loop` adds self loops regardless of whether the self-loop already exists.
If you would like to have exactly one self-loop for every node, you would need to
* The function only supports homogeneous graphs or heterogeneous graphs but
the relation graph specified by the :attr:`etype` argument is homogeneous.
* The function adds self-loops regardless of whether they already exist or not.
If one wishes to have exactly one self-loop for every node,
call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
* Features for the new edges (self-loop edges) will be created with zeros.
* Features of the new edges (self-loop edges) will be filled with zeros.
Examples
--------
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
>>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
......@@ -1354,7 +1303,7 @@ def add_self_loop(g, etype=None):
[0.],
[0.]])
**Heterogeneous Graphs with Multiple Node Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([1, 2]),
......@@ -1377,20 +1326,28 @@ def add_self_loop(g, etype=None):
new_g = add_edges(g, nodes, nodes, etype=etype)
return new_g
DGLHeteroGraph.add_self_loop = add_self_loop
DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)
def remove_self_loop(g, etype=None):
r""" Remove self loops for each node in the graph.
A new graph with self-loop removed is returned.
If there are multiple self loops for a certain node,
all of them will be removed.
r""" Remove self-loops for each node in the graph and return a new graph.
Parameters
----------
etype : str or tuple of str, optional
The type of the edges to remove. Can be omitted if there is
only one edge type in the graph.
g : DGLGraph
The graph.
etype : str or (str, str, str), optional
The type names of the edges. The allowed type name formats are:
* ``(str, str, str)`` for source node type, edge type and destination node type.
* or one ``str`` edge type name if the name can uniquely identify a
triplet format in the graph.
Can be omitted if the graph has only one type of edges.
Notes
-----
If a node has multiple self-loops, remove them all. Do nothing for nodes without
self-loops.
Examples
---------
......@@ -1398,7 +1355,7 @@ def remove_self_loop(g, etype=None):
>>> import dgl
>>> import torch
**Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type**
**Homogeneous Graphs**
>>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
>>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
......@@ -1409,7 +1366,7 @@ def remove_self_loop(g, etype=None):
>>> g.edata['he']
tensor([[0.],[3.]])
**Heterogeneous Graphs with Multiple Node Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
......@@ -1442,7 +1399,7 @@ def remove_self_loop(g, etype=None):
new_g = remove_edges(g, self_loop_eids, etype=etype)
return new_g
DGLHeteroGraph.remove_self_loop = remove_self_loop
DGLHeteroGraph.remove_self_loop = utils.alias_func(remove_self_loop)
def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
"""Given a list of graphs with the same set of nodes, find and eliminate the common
......@@ -1502,10 +1459,9 @@ def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=Tru
This function currently requires that the same node type of all graphs should have
the same node type ID, i.e. the node types are *ordered* the same.
If :attr:`copy_edata` is True, same tensors will be used for
the features of the original graphs and the returned graphs to save memory cost.
As a result, users should avoid performing in-place operations on the edge features of
the returned graph, which will corrupt the edge features of the original graph as well.
If :attr:`copy_edata` is True, the resulting graph will share the edge feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs.
Examples
--------
......@@ -1666,6 +1622,13 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
have at least one inbound edge.
Notes
-----
:func:`to_block` is most commonly used in customizing neighborhood sampling
for stochastic training on a large graph. Please refer to the user guide
:ref:`guide-minibatch` for a more thorough discussion about the methodology
of stochastic training.
Examples
--------
Converting a homogeneous graph to a block as described above:
......@@ -1727,13 +1690,6 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
>>> block.srcnodes['A'].data[dgl.NID]
tensor([2, 1])
Notes
-----
:func:`to_block` is most commonly used in customizing neighborhood sampling
for stochastic training on a large graph. Please refer to User Guide Chapter 6
for a more thorough discussion driven by the methodology of stochastic training on a
large graph.
"""
assert g.device == F.cpu(), 'the graph must be on CPU'
......@@ -1781,14 +1737,19 @@ def to_block(g, dst_nodes=None, include_dst_in_src=True):
return new_graph
def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False):
r"""Convert a graph to a simple graph, removing the parallel edges.
def to_simple(g,
return_counts='count',
writeback_mapping=False,
copy_ndata=True,
copy_edata=False):
r"""Convert a graph to a simple graph without parallel edges and return.
For a heterogeneous graph with multiple edge types, DGL removes the parallel edges
with the same edge type.
Optionally, the number of parallel edges and/or the mapping from the edges in the simple graph
to the edges in the original graph is returned.
For a heterogeneous graph with multiple edge types, DGL treats edges with the same
edge type and endpoints as parallel edges and removes them.
Optionally, one can get the the number of parallel edges by specifying the
:attr:`return_counts` argument. To get the a mapping from the edge IDs in the
input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping`
to true.
Parameters
----------
......@@ -1801,10 +1762,10 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
(Default: "count")
writeback_mapping: bool, optional
If True, a write-back mapping is returned for each edge
type subgraph. The write-back mapping is a tensor recording
the mapping from the IDs of the edges in the new graph to
the IDs of the edges in the original graph. If the graph is
If True, return an extra write-back mapping for each edge
type. The write-back mapping is a tensor recording
the mapping from the edge IDs in the input graph to
the edge IDs in the result graph. If the graph is
heterogeneous, DGL returns a dictionary of edge types and such
tensors.
......@@ -1833,21 +1794,17 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
DGLGraph
The graph.
tensor or dict of tensor
The writeback mapping.
Only returned if ``writeback_mapping`` is True.
The writeback mapping. Only when ``writeback_mapping`` is True.
Notes
-----
If ``copy_ndata`` is ``True``, same tensors will be used for
the features of the original graph and the to_simpled graph. As a result, users
should avoid performing in-place operations on the features of the to_simpled
graph, which will corrupt the features of the original graph as well. For
concrete examples, refer to the ``Examples`` section below.
If :attr:`copy_ndata` is True, the resulting graph will share the node feature
tensors with the input graph. Hence, users should try to avoid in-place operations
which will be visible to both graphs.
Examples
--------
**Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type**
**Homogeneous Graphs**
Create a graph for demonstrating to_simple API.
In the original graph, there are multiple edges between 1 and 2.
......@@ -1881,24 +1838,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
>>> 'h' in g.edata
False
**In-place operations on features of one graph will be reflected on features of
the simple graph, which is dangerous. Out-place operations will not be reflected.**
>>> sg.ndata['h'] += 1
>>> g.ndata['h']
tensor([[1.],
[2.],
[3.]])
>>> g.ndata['h'] += 1
>>> sg.ndata['h']
tensor([[2.],
[3.],
[4.]])
>>> sg.ndata['h2'] = th.ones(3, 1)
>>> 'h2' in g.ndata
False
**Heterogeneous Graphs with Multiple Edge Types**
**Heterogeneous Graphs**
>>> g = dgl.heterograph({
... ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
......@@ -1968,7 +1908,7 @@ def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True
return simple_graph
DGLHeteroGraph.to_simple = to_simple
DGLHeteroGraph.to_simple = utils.alias_func(to_simple)
def as_heterograph(g, ntype='_U', etype='_E'): # pylint: disable=unused-argument
"""Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.
......
......@@ -891,4 +891,12 @@ def set_num_threads(num_threads):
"""
_CAPI_DGLSetOMPThreads(num_threads)
def alias_func(func):
"""Return an alias function with proper docstring."""
@wraps(func)
def _fn(*args, **kwargs):
return func(*args, **kwargs)
_fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__)
return _fn
_init_api("dgl.utils.internal")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment