Unverified Commit be444e52 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)



* Update graph

* Fix for dgl.graph

* from_scipy

* Replace canonical_etypes with relations

* from_networkx

* Update for hetero_from_relations

* Roll back the change of canonical_etypes to relations

* heterograph

* bipartite

* Update doc

* Fix lint

* Fix lint

* Fix test cases

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix test

* Fix

* Update

* Use DGLError

* Update

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* rewrite sanity checks

* delete unnecessary checks

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Update

* Fix

* Update

* Fix
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 0afc3cf8
...@@ -66,7 +66,6 @@ def main(args): ...@@ -66,7 +66,6 @@ def main(args):
hg = dataset[0] hg = dataset[0]
num_rels = len(hg.canonical_etypes) num_rels = len(hg.canonical_etypes)
num_of_ntype = len(hg.ntypes)
category = dataset.predict_category category = dataset.predict_category
num_classes = dataset.num_classes num_classes = dataset.num_classes
train_mask = hg.nodes[category].data.pop('train_mask') train_mask = hg.nodes[category].data.pop('train_mask')
...@@ -98,7 +97,7 @@ def main(args): ...@@ -98,7 +97,7 @@ def main(args):
category_id = i category_id = i
# edge type and normalization factor # edge type and normalization factor
g = dgl.to_homo(hg) g = dgl.to_homogeneous(hg, edata=['norm'])
# check cuda # check cuda
if args.gpu < 0: if args.gpu < 0:
......
...@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ ...@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
* * \c count : The array of edge occurrences per edge type. * * \c count : The array of edge occurrences per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type. * * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
* *
* \note Example: consider the following graph: * \note Example: consider a graph with the following edges
* *
* g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]) * [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
* *
* Then ToSimpleGraph(g) would yield the following elements: * Then ToSimpleGraph(g) would yield the following elements:
* *
* * The first element would be the simple graph itself: * * The first element would be the simple graph itself with the following edges
* *
* simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)]) * [(0, 1), (1, 3), (1, 4), (2, 2)]
* *
* * The second element is an array \c count. \c count[i] stands for the number of edges * * The second element is an array \c count. \c count[i] stands for the number of edges
* connecting simple_g.src[i] and simple_g.dst[i] in the original graph. * connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
......
...@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph): ...@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
self._node_frame = old_nframe self._node_frame = old_nframe
self._edge_frame = old_eframe self._edge_frame = old_eframe
@property
def is_homogeneous(self): def is_homogeneous(self):
"""Return if the graph is homogeneous.""" """Return if the graph is homogeneous."""
return True return True
......
...@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None): ...@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
for i in range(num_split)] for i in range(num_split)]
# Create graphs # Create graphs
gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype) gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)] for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]
# Unbatch node features # Unbatch node features
......
"""Module for converting graph from/to other object.""" """Module for converting graph from/to other object."""
# pylint: disable=dangerous-default-value
from collections import defaultdict from collections import defaultdict
from scipy.sparse import spmatrix
import numpy as np import numpy as np
import networkx as nx import networkx as nx
...@@ -17,397 +17,165 @@ __all__ = [ ...@@ -17,397 +17,165 @@ __all__ = [
'hetero_from_relations', 'hetero_from_relations',
'hetero_from_shared_memory', 'hetero_from_shared_memory',
'heterograph', 'heterograph',
'to_heterogeneous',
'to_hetero', 'to_hetero',
'to_homogeneous',
'to_homo', 'to_homo',
'from_scipy', 'from_scipy',
'bipartite_from_scipy',
'from_networkx', 'from_networkx',
'bipartite_from_networkx',
'to_networkx', 'to_networkx',
] ]
def graph(data, def graph(data,
ntype='_N', etype='_E', ntype=None, etype=None,
*,
num_nodes=None, num_nodes=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None, idtype=None,
device=None, device=None,
card=None,
**deprecated_kwargs): **deprecated_kwargs):
"""Create a graph with one type of nodes and edges. """Create a graph.
In the sparse matrix perspective, :func:`dgl.graph` creates a graph
whose adjacency matrix must be square while :func:`dgl.bipartite`
creates a graph that does not necessarily have square adjacency matrix.
Parameters Parameters
---------- ----------
data : graph data data : graph data
Data to initialize graph structure. Supported data formats are The data for constructing a graph, which takes the form of :math:`(U, V)`.
:math:`(U[i], V[i])` forms the edge with ID :math:`i` in the graph.
(1) list of edge pairs (e.g. [(0, 2), (3, 1), ...]) The allowed data formats are:
(2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...], [2, 1, ...]))
(3) scipy sparse matrix - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs.
(4) networkx graph DGL calls this format "tuple of node-tensors". The tensors should have the same
data type of int32/int64 and device context (see below the descriptions of
:attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
ntype : str, optional ntype : str, optional
Node type name. (Default: _N) Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
etype : str, optional etype : str, optional
Edge type name. (Default: _E) Deprecated. To construct a graph with named edge types, use :func:`dgl.heterograph`.
num_nodes : int, optional num_nodes : int, optional
Number of nodes in the graph. If None, infer from input data, i.e. The number of nodes in the graph. If not given, this will be the largest node ID
the largest node ID plus 1. (Default: None) plus 1 from the :attr:`data` argument. If given and the value is no greater than
validate : bool, optional the largest node ID from the :attr:`data` argument, DGL will raise an error.
If True, check if node ids are within cardinality, the check process may take idtype : int32 or int64, optional
some time. (Default: True) The data type for storing the structure-related graph information such as node and
If False and card is not None, user would receive a warning. edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
formats : str or list of str If ``None`` (default), DGL infers the ID type from the :attr:`data` argument.
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them, See "Notes" for more details.
Force the storage formats. Default: ``['coo', 'csr', 'csc']``. device : device context, optional
idtype : int32, int64, optional The device of the returned graph, which should be a framework-specific device object
Integer ID type. Valid options are int32 or int64. If None, try infer from (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
the given data. the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
device : Device context, optional returned graph is on CPU. If the specified :attr:`device` differs from that of the
Device on which the graph is created. Default: infer from data. provided tensors, it casts the given tensors to the specified device first.
card : int, optional
Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the graph).
If None, infer from input data, i.e. the largest node ID plus 1. (Default: None)
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
The created graph.
Notes
-----
1. If the :attr:`idtype` argument is not given then:
- in the case of the tuple of node-tensor format, DGL uses the
data type of the given ID tensors.
- in the case of the tuple of sequence format, DGL uses int64.
Once the graph has been created, you can change the data type by using
:func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
If the specified :attr:`idtype` argument differs from the data type of the provided
tensors, it casts the given tensors to the specified data type first.
2. The most efficient construction approach is to provide a tuple of node tensors without
specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
the storage with the input node-tensors in this case.
3. DGL internally maintains multiple copies of the graph structure in different
`sparse formats <https://en.wikipedia.org/wiki/Sparse_matrix>`_ and chooses the most
efficient one depending on the computation invoked. If memory usage becomes an issue
in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed
formats.
Examples Examples
-------- --------
Create from pairs of edges with form (src, dst)
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)]) The following example uses PyTorch backend.
Create from source and destination vertex ID lists >>> import dgl
>>> import torch
>>> u = [0, 0, 1] Create a small three-edge graph.
>>> v = [2, 3, 2]
>>> g = dgl.graph((u, v))
The IDs can also be stored in framework-specific tensors >>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = torch.tensor([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = torch.tensor([1, 2, 3])
>>> g = dgl.graph((src_ids, dst_ids))
>>> import torch Explicitly specify the number of nodes in the graph.
>>> u = torch.tensor([0, 0, 1])
>>> v = torch.tensor([2, 3, 2])
>>> g = dgl.graph((u, v))
Create from scipy sparse matrix >>> g = dgl.graph((src_ids, dst_ids), num_nodes=100)
>>> from scipy.sparse import coo_matrix Create a graph on the first GPU with data type int32.
>>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
>>> g = dgl.graph(spmat)
Create from networkx graph >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')
>>> import networkx as nx See Also
>>> nxg = nx.path_graph(3) --------
>>> g = dgl.graph(nxg) from_scipy
from_networkx
Specify node and edge type names
>>> g = dgl.graph(..., 'user', 'follows')
>>> g.ntypes
['user']
>>> g.etypes
['follows']
>>> g.canonical_etypes
[('user', 'follows', 'user')]
Check if node ids are within num_nodes specified
>>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=2, validate=True)
...
dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
>>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=3, validate=True)
Graph(num_nodes=3, num_edges=3,
ndata_schemes={}
edata_schemes={})
""" """
# Deprecated arguments
if ntype is not None:
raise DGLError('The ntype argument is deprecated for dgl.graph. To construct ' \
'a graph with named node types, use dgl.heterograph.')
if etype is not None:
raise DGLError('The etype argument is deprecated for dgl.graph. To construct ' \
'a graph with named edge types, use dgl.heterograph.')
if isinstance(data, spmatrix):
raise DGLError("dgl.graph no longer supports graph construction from a SciPy "
"sparse matrix, use dgl.from_scipy instead.")
if isinstance(data, nx.Graph):
raise DGLError("dgl.graph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.")
if len(deprecated_kwargs) != 0: if len(deprecated_kwargs) != 0:
raise DGLError("Key word arguments {} have been removed from dgl.graph()." raise DGLError("Key word arguments {} have been removed from dgl.graph()."
" They are moved to dgl.from_scipy() and dgl.from_networkx()." " They are moved to dgl.from_scipy() and dgl.from_networkx()."
" Please refer to their API documents for more details.".format( " Please refer to their API documents for more details.".format(
deprecated_kwargs.keys())) deprecated_kwargs.keys()))
if isinstance(data, DGLHeteroGraph):
return data.astype(idtype).to(device)
if card is not None:
dgl_warning("Argument 'card' will be deprecated. "
"Please use num_nodes={} instead.".format(card))
num_nodes = card
u, v, urange, vrange = utils.graphdata2tensors(data, idtype) u, v, urange, vrange = utils.graphdata2tensors(data, idtype)
if num_nodes is not None: # override the number of nodes if num_nodes is not None: # override the number of nodes
if num_nodes < max(urange, vrange):
raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
urange, vrange = num_nodes, num_nodes urange, vrange = num_nodes, num_nodes
g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange, g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
validate, formats=formats)
return g.to(device) return g.to(device)
def bipartite(data, def bipartite(data,
utype='_U', etype='_E', vtype='_V', utype='_U', etype='_E', vtype='_V',
num_nodes=None, num_nodes=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None,
device=None,
card=None, card=None,
**deprecated_kwargs): validate=True,
"""Create a bipartite graph. restrict_format='any',
**kwargs):
The result graph is directed and edges must be from ``utype`` nodes """DEPRECATED: use dgl.heterograph instead."""
to ``vtype`` nodes. Nodes of each type have their own ID counts. raise DGLError(
'dgl.bipartite is deprecated. Use dgl.heterograph({' +
In the sparse matrix perspective, :func:`dgl.graph` creates a graph "('{}', '{}', '{}')".format(utype, etype, vtype) +
whose adjacency matrix must be square while :func:`dgl.bipartite` ' : data} to create a bipartite graph instead.')
creates a graph that does not necessarily have square adjacency matrix.
Parameters
----------
data : graph data
Data to initialize graph structure. Supported data formats are
(1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
(2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...], [2, 1, ...]))
(3) scipy sparse matrix
(4) networkx graph
utype : str, optional
Source node type name. (Default: _U)
etype : str, optional
Edge type name. (Default: _E)
vtype : str, optional
Destination node type name. (Default: _V)
num_nodes : 2-tuple of int, optional
Number of nodes in the source and destination group. If None, infer from input data,
i.e. the largest node ID plus 1 for each type. (Default: None)
validate : bool, optional
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and card is not None, user would receive a warning.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Valid options are int32 or int64. If None, try infer from
the given data.
device : Device context, optional
Device on which the graph is created. Default: infer from data.
card : 2-tuple of int, optional
Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the source and
destination group). If None, infer from input data, i.e. the largest node ID plus 1
for each type. (Default: None)
Returns
-------
DGLHeteroGraph
Examples
--------
Create from pairs of edges
>>> g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game')
>>> g.ntypes
['user', 'game']
>>> g.etypes
['plays']
>>> g.canonical_etypes
[('user', 'plays', 'game')]
>>> g.number_of_nodes('user')
2
>>> g.number_of_nodes('game')
4
>>> g.number_of_edges('plays') # 'plays' could be omitted here
3
Create from source and destination vertex ID lists
>>> u = [0, 0, 1]
>>> v = [2, 3, 2]
>>> g = dgl.bipartite((u, v))
The IDs can also be stored in framework-specific tensors
>>> import torch
>>> u = torch.tensor([0, 0, 1])
>>> v = torch.tensor([2, 3, 2])
>>> g = dgl.bipartite((u, v))
Create from scipy sparse matrix. Since scipy sparse matrix has explicit
shape, the cardinality of the result graph is derived from that.
>>> from scipy.sparse import coo_matrix
>>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
>>> g = dgl.bipartite(spmat, 'user', 'plays', 'game')
>>> g.number_of_nodes('user')
4
>>> g.number_of_nodes('game')
4
Create from networkx graph. The given graph must follow the bipartite
graph convention in networkx. Each node has a ``bipartite`` attribute
with values 0 or 1. The result graph has two types of nodes and only
edges from ``bipartite=0`` to ``bipartite=1`` will be included.
>>> import networkx as nx
>>> nxg = nx.complete_bipartite_graph(3, 4)
>>> g = dgl.bipartite(nxg, 'user', 'plays', 'game')
>>> g.number_of_nodes('user')
3
>>> g.number_of_nodes('game')
4
>>> g.edges()
(tensor([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]), tensor([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]))
Check if node ids are within num_nodes specified
>>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(2, 4), validate=True)
...
dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
>>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(3, 4), validate=True)
>>> g
Graph(num_nodes={'_U': 3, '_V': 4},
num_edges={('_U', '_E', '_V'): 3},
metagraph=[('_U', '_V')])
"""
if len(deprecated_kwargs) != 0:
raise DGLError("Key word arguments {} have been removed from dgl.graph()."
" They are moved to dgl.from_scipy() and dgl.from_networkx()."
" Please refer to their API documents for more details.".format(
deprecated_kwargs.keys()))
if utype == vtype:
raise DGLError('utype should not be equal to vtype. Use ``dgl.graph`` instead.')
if card is not None:
dgl_warning("Argument 'card' will be deprecated. "
"Please use num_nodes={} instead.".format(card))
num_nodes = card
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True)
if num_nodes is not None: # override the number of nodes
urange, vrange = num_nodes
g = create_from_edges(
u, v, utype, etype, vtype, urange, vrange, validate,
formats=formats)
return g.to(device)
def hetero_from_relations(rel_graphs, num_nodes_per_type=None): def hetero_from_relations(rel_graphs, num_nodes_per_type=None):
"""Create a heterograph from graphs representing connections of each relation. """DEPRECATED: use dgl.heterograph instead."""
raise DGLError('dgl.hetero_from_relations is deprecated.\n\n'
The input is a list of heterographs where the ``i``th graph contains edges of type 'Use dgl.heterograph instead.')
:math:`(s_i, e_i, d_i)`.
If two graphs share a same node type, the number of nodes for the corresponding type
should be the same. See **Examples** for details.
Parameters
----------
rel_graphs : list of DGLHeteroGraph
Each element corresponds to a heterograph for one (src, edge, dst) relation.
num_nodes_per_type : dict[str, Tensor], optional
Number of nodes per node type. If not given, DGL will infer the number of nodes
from the given relation graphs.
Returns
-------
DGLHeteroGraph
A heterograph consisting of all relations.
Examples
--------
>>> import dgl
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
>>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game')
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
will raise an error as we have 3 nodes of type 'user' in follows_g and 4 nodes of type
'user' in plays_g.
We have two possible methods to avoid the construction.
**Method 1**: Manually specify the number of nodes for all types when constructing
the relation graphs.
>>> # A graph with 4 nodes of type 'user'
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', num_nodes=4)
>>> # A bipartite graph with 4 nodes of src type ('user') and 2 nodes of dst type ('game')
>>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game', num_nodes=(4, 2))
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
>>> print(g)
Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2,
('developer', 'develops', 'game'): 2},
metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
``devs_g`` does not have nodes of type ``'user'`` so no error will be raised.
**Method 2**: Construct a heterograph at once without intermediate relation graphs,
in which case we will infer the number of nodes for each type.
>>> g = dgl.heterograph({
>>> ('user', 'follows', 'user'): [(0, 1), (1, 2)],
>>> ('user', 'plays', 'game'): [(0, 0), (3, 1)],
>>> ('developer', 'develops', 'game'): [(0, 0), (1, 1)]
>>> })
>>> print(g)
Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
num_edges={('user', 'follows', 'user'): 2,
('user', 'plays', 'game'): 2,
('developer', 'develops', 'game'): 2},
metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
"""
utils.check_all_same_idtype(rel_graphs, 'rel_graphs')
utils.check_all_same_device(rel_graphs, 'rel_graphs')
# TODO(minjie): this API can be generalized as a union operation of the input graphs
# TODO(minjie): handle node/edge data
# infer meta graph
meta_edges_src, meta_edges_dst = [], []
ntypes = []
etypes = []
# TODO(BarclayII): I'm keeping the node type names sorted because even if
# the metagraph is the same, the same node type name in different graphs may
# map to different node type IDs.
# In the future, we need to lower the type names into C++.
if num_nodes_per_type is None:
ntype_set = set()
for rgrh in rel_graphs:
assert len(rgrh.etypes) == 1
stype, etype, dtype = rgrh.canonical_etypes[0]
ntype_set.add(stype)
ntype_set.add(dtype)
ntypes = list(sorted(ntype_set))
else:
ntypes = list(sorted(num_nodes_per_type.keys()))
num_nodes_per_type = utils.toindex([num_nodes_per_type[ntype] for ntype in ntypes], "int64")
ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
for rgrh in rel_graphs:
stype, etype, dtype = rgrh.canonical_etypes[0]
meta_edges_src.append(ntype_dict[stype])
meta_edges_dst.append(ntype_dict[dtype])
etypes.append(etype)
# metagraph is DGLGraph, currently still using int64 as index dtype
metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
# create graph index
hgidx = heterograph_index.create_heterograph_from_relations(
metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
retg = DGLHeteroGraph(hgidx, ntypes, etypes)
for i, rgrh in enumerate(rel_graphs):
for ntype in rgrh.ntypes:
retg.nodes[ntype].data.update(rgrh.nodes[ntype].data)
retg._edge_frames[i].update(rgrh._edge_frames[0])
return retg
def hetero_from_shared_memory(name): def hetero_from_shared_memory(name):
"""Create a heterograph from shared memory with the given name. """Create a heterograph from shared memory with the given name.
...@@ -429,94 +197,163 @@ def hetero_from_shared_memory(name): ...@@ -429,94 +197,163 @@ def hetero_from_shared_memory(name):
def heterograph(data_dict, def heterograph(data_dict,
num_nodes_dict=None, num_nodes_dict=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None, idtype=None,
device=None): device=None):
"""Create a heterogeneous graph from a dictionary between edge types and edge lists. """Create a heterogeneous graph.
Parameters Parameters
---------- ----------
data_dict : dict data_dict : graph data
The dictionary between edge types and edge list data. The dictionary data for constructing a heterogeneous graph. The keys are in the form of
string triplets (src_type, edge_type, dst_type), specifying the source node,
edge, and destination node types. The values are graph data in the form of
:math:`(U, V)`, where :math:`(U[i], V[i])` forms the edge with ID :math:`i`.
The allowed graph data formats are:
- ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls
this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
num_nodes_dict : dict[str, int], optional
The number of nodes for some node types, which is a dictionary mapping a node type
:math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
:math:`T`, DGL finds the largest ID appearing in *every* graph data whose source
or destination node type is :math:`T`, and sets the number of nodes to be that ID
plus one. If given and the value is no greater than the largest ID for some node type,
DGL will raise an error. By default, DGL infers the number of nodes for all node types.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
If ``None`` (default), DGL infers the ID type from the :attr:`data_dict` argument.
device : device context, optional
The device of the returned graph, which should be a framework-specific device object
(e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
returned graph is on CPU. If the specified :attr:`device` differs from that of the
provided tensors, it casts the given tensors to the specified device first.
The edge types are specified as a triplet of (source node type name, edge type Returns
name, destination node type name). -------
DGLGraph
The created graph.
The edge list data can be anything acceptable by :func:`dgl.graph` or Notes
:func:`dgl.bipartite`, or objects returned by the two functions themselves. -----
num_nodes_dict : dict[str, int] 1. If the :attr:`idtype` argument is not given then:
The number of nodes for each node type.
By default DGL infers the number of nodes for each node type from ``data_dict`` - in the case of the tuple of node-tensor format, DGL uses
by taking the maximum node ID plus one for each node type. the data type of the given ID tensors.
validate : bool, optional - in the case of the tuple of sequence format, DGL uses int64.
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and num_nodes_dict is not None, user would receive a warning.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Valid options are int32 or int64. If None, try infer from
the given data.
device : Device context, optional
Device on which the graph is created. Default: infer from data.
Returns Once the graph has been created, you can change the data type by using
------- :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
DGLHeteroGraph
If the specified :attr:`idtype` argument differs from the data type of the provided
tensors, it casts the given tensors to the specified data type first.
2. The most efficient construction approach is to provide a tuple of node tensors without
specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
the storage with the input node-tensors in this case.
3. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples Examples
-------- --------
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): [(0, 1), (1, 2)], The following example uses PyTorch backend.
... ('user', 'plays', 'game'): [(0, 0), (1, 0), (1, 1), (2, 1)],
... ('developer', 'develops', 'game'): [(0, 0), (1, 1)], >>> import dgl
... }) >>> import torch
Create a heterograph with three canonical edge types.
>>> data_dict = {
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'topic'): (torch.tensor([1, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([0, 3]), torch.tensor([3, 4]))
... }
>>> g = dgl.heterograph(data_dict)
>>> g
Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},
num_edges={('user', 'follows', 'user'): 2, ('user', 'follows', 'topic'): 2,
('user', 'plays', 'game'): 2},
metagraph=[('user', 'user', 'follows'), ('user', 'topic', 'follows'),
('user', 'game', 'plays')])
Explicitly specify the number of nodes for each node type in the graph.
>>> num_nodes_dict = {'user': 4, 'topic': 4, 'game': 6}
>>> g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict)
Create a graph on the first GPU with data type int32.
>>> g = dgl.heterograph(data_dict, idtype=torch.int32, device='cuda:0')
""" """
# Try infer idtype # Convert all data to node tensors first
if idtype is None: node_tensor_dict = {}
for data in data_dict.values(): need_infer = num_nodes_dict is None
if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]):
idtype = F.dtype(data[0])
break
# Convert all data to edge tensors first.
data_dict = {(sty, ety, dty) : utils.graphdata2tensors(data, idtype, bipartite=(sty != dty))
for (sty, ety, dty), data in data_dict.items()}
# infer number of nodes for each node type
if num_nodes_dict is None: if num_nodes_dict is None:
num_nodes_dict = defaultdict(int) num_nodes_dict = defaultdict(int)
for (srctype, etype, dsttype), data in data_dict.items(): for (sty, ety, dty), data in data_dict.items():
_, _, nsrc, ndst = data if isinstance(data, spmatrix):
num_nodes_dict[srctype] = max(num_nodes_dict[srctype], nsrc) raise DGLError("dgl.heterograph no longer supports graph construction from a SciPy "
num_nodes_dict[dsttype] = max(num_nodes_dict[dsttype], ndst) "sparse matrix, use dgl.from_scipy instead.")
if isinstance(data, nx.Graph):
raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.")
is_bipartite = (sty != dty)
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite)
node_tensor_dict[(sty, ety, dty)] = (u, v)
if need_infer:
num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
else: # sanity check
if num_nodes_dict[sty] < urange:
raise DGLError('The given number of nodes of node type {} must be larger than'
' the max ID in the data, but got {} and {}.'.format(
sty, num_nodes_dict[sty], urange - 1))
if num_nodes_dict[dty] < vrange:
raise DGLError('The given number of nodes of node type {} must be larger than'
' the max ID in the data, but got {} and {}.'.format(
sty, num_nodes_dict[dty], vrange - 1))
# Create the graph
# Sort the ntypes and relation tuples to have a deterministic order for the same set
# of type names.
ntypes = list(sorted(num_nodes_dict.keys()))
relations = list(sorted(node_tensor_dict.keys()))
num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
meta_edges_src = []
meta_edges_dst = []
etypes = []
rel_graphs = [] rel_graphs = []
for (srctype, etype, dsttype), data in data_dict.items(): for srctype, etype, dsttype in relations:
u, v, _, _ = data meta_edges_src.append(ntype_dict[srctype])
if srctype == dsttype: meta_edges_dst.append(ntype_dict[dsttype])
rel_graphs.append(graph( etypes.append(etype)
(u, v), srctype, etype, src, dst = node_tensor_dict[(srctype, etype, dsttype)]
num_nodes=num_nodes_dict[srctype], g = create_from_edges(src, dst, srctype, etype, dsttype,
validate=validate, num_nodes_dict[srctype], num_nodes_dict[dsttype])
formats=formats, rel_graphs.append(g)
idtype=idtype, device=device))
else:
rel_graphs.append(bipartite(
(u, v), srctype, etype, dsttype,
num_nodes=(num_nodes_dict[srctype], num_nodes_dict[dsttype]),
validate=validate,
formats=formats,
idtype=idtype, device=device))
return hetero_from_relations(rel_graphs, num_nodes_dict) # metagraph is DGLGraph, currently still using int64 as index dtype
metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
# create graph index
hgidx = heterograph_index.create_heterograph_from_relations(
metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
retg = DGLHeteroGraph(hgidx, ntypes, etypes)
return retg.to(device)
def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
metagraph=None): etype_field=ETYPE, metagraph=None):
"""Convert the given homogeneous graph to a heterogeneous graph. """Convert the given homogeneous graph to a heterogeneous graph.
The input graph should have only one type of nodes and edges. Each node and edge The input graph should have only one type of nodes and edges. Each node and edge
...@@ -531,10 +368,13 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, ...@@ -531,10 +368,13 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
(0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge (0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge
type name, but can be distinguished by a canonical edge type tuple. type name, but can be distinguished by a canonical edge type tuple.
This function will copy any node/edge features from :attr:`G` to the returned heterogeneous
graph, except for node/edge types and IDs used to recover the heterogeneous graph.
Parameters Parameters
---------- ----------
G : DGLHeteroGraph G : DGLGraph
Input homogeneous graph. The homogeneous graph.
ntypes : list of str ntypes : list of str
The node type names. The node type names.
etypes : list of str etypes : list of str
...@@ -551,8 +391,8 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, ...@@ -551,8 +391,8 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
A heterograph. The parent node and edge ID are stored in the column A heterogeneous graph. The parent node and edge ID are stored in the column
``dgl.NID`` and ``dgl.EID`` respectively for all node/edge types. ``dgl.NID`` and ``dgl.EID`` respectively for all node/edge types.
Notes Notes
...@@ -568,47 +408,47 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, ...@@ -568,47 +408,47 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
Examples Examples
-------- --------
>>> g1 = dgl.bipartite([(0, 1), (1, 2)], 'user', 'develops', 'activity') >>> import dgl
>>> g2 = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') >>> hg = dgl.heterograph({
>>> hetero_g = dgl.hetero_from_relations([g1, g2]) ... ('user', 'develops', 'activity'): ([0, 1], [1, 2]),
>>> print(hetero_g) ... ('developer', 'develops', 'game'): ([0, 1], [0, 1])
... })
>>> print(hg)
Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2}, Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2}, num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
metagraph=[('user', 'activity'), ('developer', 'game')]) metagraph=[('user', 'activity'), ('developer', 'game')])
We first convert the heterogeneous graph to a homogeneous graph. We first convert the heterogeneous graph to a homogeneous graph.
>>> homo_g = dgl.to_homo(hetero_g) >>> g = dgl.to_homogeneous(hg)
>>> print(homo_g) >>> print(g)
Graph(num_nodes=9, num_edges=4, Graph(num_nodes=9, num_edges=4,
ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64), ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)} '_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64), edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}) '_ID': Scheme(shape=(), dtype=torch.int64)})
>>> homo_g.ndata >>> g.ndata
{'_TYPE': tensor([0, 0, 1, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 0, 1, 2, 0, 1, 0, 1])} {'_TYPE': tensor([0, 0, 1, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 0, 1, 2, 0, 1, 0, 1])}
Nodes 0, 1 for 'user', 2, 3, 4 for 'activity', 5, 6 for 'developer', 7, 8 for 'game' Nodes 0, 1 for 'user', 2, 3, 4 for 'activity', 5, 6 for 'developer', 7, 8 for 'game'
>>> homo_g.edata >>> g.edata
{'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])} {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
Edges 0, 1 for ('user', 'develops', 'activity'), 2, 3 for ('developer', 'develops', 'game') Edges 0, 1 for ('user', 'develops', 'activity'), 2, 3 for ('developer', 'develops', 'game')
Now convert the homogeneous graph back to a heterogeneous graph. Now convert the homogeneous graph back to a heterogeneous graph.
>>> hetero_g_2 = dgl.to_hetero(homo_g, hetero_g.ntypes, hetero_g.etypes) >>> hg_2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes)
>>> print(hetero_g_2) >>> print(hg_2)
Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2}, Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2}, num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
metagraph=[('user', 'activity'), ('developer', 'game')]) metagraph=[('user', 'activity'), ('developer', 'game')])
See Also See Also
-------- --------
dgl.to_homo to_homogeneous
""" """
# TODO(minjie): use hasattr to support DGLGraph input; should be fixed once
# DGLGraph is merged with DGLHeteroGraph
if (hasattr(G, 'ntypes') and len(G.ntypes) > 1 if (hasattr(G, 'ntypes') and len(G.ntypes) > 1
or hasattr(G, 'etypes') and len(G.etypes) > 1): or hasattr(G, 'etypes') and len(G.etypes) > 1):
raise DGLError('The input graph should be homogenous and have only one ' raise DGLError('The input graph should be homogeneous and have only one '
' type of nodes and edges.') ' type of nodes and edges.')
num_ntypes = len(ntypes) num_ntypes = len(ntypes)
...@@ -660,48 +500,49 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, ...@@ -660,48 +500,49 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2) etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))] edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]
rel_graphs = [] data_dict = dict()
canonical_etypes = []
for i, (stid, etid, dtid) in enumerate(canonical_etids): for i, (stid, etid, dtid) in enumerate(canonical_etids):
src_of_etype = src_local[edge_groups[i]] src_of_etype = src_local[edge_groups[i]]
dst_of_etype = dst_local[edge_groups[i]] dst_of_etype = dst_local[edge_groups[i]]
if stid == dtid: canonical_etypes.append((ntypes[stid], etypes[etid], ntypes[dtid]))
rel_graph = graph( data_dict[canonical_etypes[-1]] = \
(src_of_etype, dst_of_etype), ntypes[stid], etypes[etid], (src_of_etype, dst_of_etype)
num_nodes=ntype_count[stid], validate=False, hg = heterograph(data_dict,
idtype=idtype, device=device) {ntype: count for ntype, count in zip(ntypes, ntype_count)},
else: idtype=idtype, device=device)
rel_graph = bipartite(
(src_of_etype,
dst_of_etype), ntypes[stid], etypes[etid], ntypes[dtid],
num_nodes=(ntype_count[stid], ntype_count[dtid]),
validate=False, idtype=idtype, device=device)
rel_graphs.append(rel_graph)
hg = hetero_from_relations(rel_graphs,
{ntype: count for ntype, count in zip(
ntypes, ntype_count)})
ntype2ngrp = {ntype : node_groups[ntid] for ntid, ntype in enumerate(ntypes)} ntype2ngrp = {ntype : node_groups[ntid] for ntid, ntype in enumerate(ntypes)}
# features # features
for key, data in G.ndata.items(): for key, data in G.ndata.items():
if key in [ntype_field, NID]:
continue
for ntid, ntype in enumerate(hg.ntypes): for ntid, ntype in enumerate(hg.ntypes):
rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data)) rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data))
hg._node_frames[ntid][key] = F.gather_row(data, rows) hg._node_frames[ntid][key] = F.gather_row(data, rows)
for key, data in G.edata.items(): for key, data in G.edata.items():
if key in [etype_field, EID]:
continue
for etid in range(len(hg.canonical_etypes)): for etid in range(len(hg.canonical_etypes)):
rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data)) rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data))
hg._edge_frames[etid][key] = F.gather_row(data, rows) hg._edge_frames[hg.get_etype_id(canonical_etypes[etid])][key] = \
F.gather_row(data, rows)
for ntid, ntype in enumerate(hg.ntypes): return hg
hg._node_frames[ntid][NID] = F.tensor(ntype2ngrp[ntype])
for etid in range(len(hg.canonical_etypes)): def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
hg._edge_frames[etid][EID] = F.tensor(edge_groups[etid]) metagraph=None):
"""Convert the given homogeneous graph to a heterogeneous graph.
return hg DEPRECATED: Please use to_heterogeneous
"""
dgl_warning("dgl.to_hetero is deprecated. Please use dgl.to_heterogeneous")
return to_heterogeneous(G, ntypes, etypes, ntype_field=ntype_field,
etype_field=etype_field, metagraph=metagraph)
def to_homo(G): def to_homogeneous(G, ndata=None, edata=None):
"""Convert the given heterogeneous graph to a homogeneous graph. """Convert the given heterogeneous graph to a homogeneous graph.
The returned graph has only one type of nodes and edges. The returned graph has only one type of nodes and edges.
...@@ -710,34 +551,62 @@ def to_homo(G): ...@@ -710,34 +551,62 @@ def to_homo(G):
is an integer representing the type id, which can be used to retrieve the type is an integer representing the type id, which can be used to retrieve the type
names stored in ``G.ntypes`` and ``G.etypes`` arguments. names stored in ``G.ntypes`` and ``G.etypes`` arguments.
If all
Parameters Parameters
---------- ----------
G : DGLHeteroGraph G : DGLGraph
Input heterogeneous graph. The heterogeneous graph.
ndata : list[str], optional
The node features to combine across all node types. For each feature ``feat`` in
:attr:`ndata`, it concatenates ``G.nodes[T].data[feat]`` across all node types ``T``.
As a result, the feature ``feat`` of all node types should have the same shape and
data type. By default, the returned graph will not have any node features.
edata : list[str], optional
The edge features to combine across all edge types. For each feature ``feat`` in
:attr:`edata`, it concatenates ``G.edges[T].data[feat]`` across all edge types ``T``.
As a result, the feature ``feat`` of all edge types should have the same shape and
data type. By default, the returned graph will not have any edge features.
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
A homogeneous graph. The parent node and edge type/ID are stored in A homogeneous graph. The parent node and edge type/ID are stored in
columns ``dgl.NTYPE/dgl.NID`` and ``dgl.ETYPE/dgl.EID`` respectively. columns ``dgl.NTYPE/dgl.NID`` and ``dgl.ETYPE/dgl.EID`` respectively.
Examples Examples
-------- --------
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows') The following example uses PyTorch backend.
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> hetero_g = dgl.hetero_from_relations([follows_g, devs_g]) >>> import dgl
>>> homo_g = dgl.to_homo(hetero_g) >>> import torch
>>> homo_g.ndata
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1])
... })
>>> hg.nodes['user'].data['h'] = torch.ones(3, 1)
>>> hg.nodes['developer'].data['h'] = torch.zeros(2, 1)
>>> hg.nodes['game'].data['h'] = torch.ones(2, 1)
>>> g = dgl.to_homogeneous(hg)
>>> # The first three nodes are for 'user', the next two are for 'developer',
>>> # and the last two are for 'game'
>>> g.ndata
{'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])} {'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])}
First three nodes for 'user', next two for 'developer' and the last two for 'game' >>> # The first two edges are for 'follows', and the next two are for 'develops' edges.
>>> homo_g.edata >>> g.edata
{'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])} {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
First two edges for 'follows', next two for 'develops'
Combine feature 'h' across all node types in the conversion.
>>> g = dgl.to_homogeneous(hg, ndata=['h'])
>>> g.ndata['h']
tensor([[1.], [1.], [1.], [0.], [0.], [1.], [1.]])
See Also See Also
-------- --------
dgl.to_hetero to_heterogeneous
""" """
num_nodes_per_ntype = [G.number_of_nodes(ntype) for ntype in G.ntypes] num_nodes_per_ntype = [G.number_of_nodes(ntype) for ntype in G.ntypes]
offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0) offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0)
...@@ -767,11 +636,15 @@ def to_homo(G): ...@@ -767,11 +636,15 @@ def to_homo(G):
eids.append(F.arange(0, num_edges, G.idtype)) eids.append(F.arange(0, num_edges, G.idtype))
retg = graph((F.cat(srcs, 0), F.cat(dsts, 0)), num_nodes=total_num_nodes, retg = graph((F.cat(srcs, 0), F.cat(dsts, 0)), num_nodes=total_num_nodes,
validate=False, idtype=G.idtype, device=G.device) idtype=G.idtype, device=G.device)
# copy features # copy features
comb_nf = combine_frames(G._node_frames, range(len(G.ntypes))) if ndata is None:
comb_ef = combine_frames(G._edge_frames, range(len(G.etypes))) ndata = []
if edata is None:
edata = []
comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)), col_names=ndata)
comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)), col_names=edata)
if comb_nf is not None: if comb_nf is not None:
retg.ndata.update(comb_nf) retg.ndata.update(comb_nf)
if comb_ef is not None: if comb_ef is not None:
...@@ -785,89 +658,317 @@ def to_homo(G): ...@@ -785,89 +658,317 @@ def to_homo(G):
return retg return retg
def to_homo(G):
"""Convert the given heterogeneous graph to a homogeneous graph.
DEPRECATED: Please use to_homogeneous
"""
dgl_warning("dgl.to_homo is deprecated. Please use dgl.to_homogeneous")
return to_homogeneous(G)
def from_scipy(sp_mat, def from_scipy(sp_mat,
ntype='_N', etype='_E',
eweight_name=None, eweight_name=None,
formats=['coo', 'csr', 'csc'], idtype=None,
idtype=None): device=None):
"""Create a DGLGraph from a SciPy sparse matrix. """Create a graph from a SciPy sparse matrix.
Parameters Parameters
---------- ----------
sp_mat : SciPy sparse matrix sp_mat : scipy.sparse.spmatrix
SciPy sparse matrix. The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]`` represents an edge from
ntype : str node ``i`` to ``j``. The matrix must have square shape ``(N, N)``, where ``N`` is the
Type name for both source and destination nodes number of nodes in the graph.
etype : str
Type name for edges
eweight_name : str, optional eweight_name : str, optional
If given, the edge weights in the matrix will be The edata name for storing the nonzero values of :attr:`sp_mat`. If given, DGL will
stored in ``edata[eweight_name]``. store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]`` of the returned
formats : str or list of str graph.
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them, idtype : int32 or int64, optional
Force the storage formats. Default: ``['coo', 'csr', 'csc']``. The data type for storing the structure-related graph information such as node and
idtype : int32, int64, optional edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
Integer ID type. Must be int32 or int64. Default: int64. By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns Returns
------- -------
g : DGLGraph DGLGraph
The created graph.
Notes
-----
1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
:class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
2. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import numpy as np
>>> import torch
>>> from scipy.sparse import coo_matrix
Create a small three-edge graph.
>>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = np.array([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = np.array([1, 2, 3])
>>> # Weight for edges (2, 1), (3, 2), (4, 3)
>>> eweight = np.array([0.2, 0.3, 0.5])
>>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)), shape=(5, 5))
>>> g = dgl.from_scipy(sp_mat)
Retrieve the edge weights.
>>> g = dgl.from_scipy(sp_mat, eweight_name='w')
>>> g.edata['w']
tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
Create a graph on the first GPU with data type int32.
>>> g = dgl.from_scipy(sp_mat, idtype=torch.int32, device='cuda:0')
See Also
--------
graph
from_networkx
""" """
# Sanity check
num_rows = sp_mat.shape[0]
num_cols = sp_mat.shape[1]
if num_rows != num_cols:
raise DGLError('Expect the number of rows to be the same as the number of columns for '
'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype) u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange, g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
validate=False, formats=formats) if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device)
def bipartite_from_scipy(sp_mat,
utype, etype, vtype,
eweight_name=None,
idtype=None,
device=None):
"""Create a unidirectional bipartite graph from a SciPy sparse matrix.
The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
Parameters
----------
sp_mat : scipy.sparse.spmatrix
The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]``
represents an edge from node ``i`` of type :attr:`utype` to ``j`` of type :attr:`vtype`.
Let the matrix shape be ``(N, M)``. There will be ``N`` nodes of type :attr:`utype`
and ``M`` nodes of type ``vtype`` in the resulting graph.
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
eweight_name : str, optional
The edata name for storing the nonzero values of :attr:`sp_mat`.
If given, DGL will store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]``
of the returned graph.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns
-------
DGLGraph
The created graph.
Notes
-----
1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
:class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
2. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import numpy as np
>>> import torch
>>> from scipy.sparse import coo_matrix
Create a small three-edge graph.
>>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = np.array([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = np.array([1, 2, 3])
>>> # Weight for edges (2, 1), (3, 2), (4, 3)
>>> eweight = np.array([0.2, 0.3, 0.5])
>>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)))
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V')
Retrieve the edge weights.
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V', eweight_name='w')
>>> g.edata['w']
tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
Create a graph on the first GPU with data type int32.
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V',
... idtype=torch.int32, device='cuda:0')
See Also
--------
heterograph
bipartite_from_networkx
"""
# Sanity check
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
if eweight_name is not None: if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data) g.edata[eweight_name] = F.tensor(sp_mat.data)
return g return g.to(device)
def from_networkx(nx_graph, *, def from_networkx(nx_graph,
ntype='_N', etype='_E',
node_attrs=None, node_attrs=None,
edge_attrs=None, edge_attrs=None,
edge_id_attr_name='id', edge_id_attr_name=None,
formats=['coo', 'csr', 'csc'], idtype=None,
idtype=None): device=None):
"""Create a DGLGraph from networkx. """Create a graph from a NetworkX graph.
Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
It is recommended to first convert a NetworkX graph into a tuple of node-tensors
and then construct a DGLGraph with :func:`dgl.graph`.
Parameters Parameters
---------- ----------
nx_graph : networkx.Graph nx_graph : networkx.Graph
NetworkX graph. The NetworkX graph holding the graph structure and the node/edge attributes.
ntype : str DGL will relabel the nodes using consecutive integers starting from zero if it is
Type name for both source and destination nodes not the case. If the input graph is undirected, DGL converts it to a directed graph
etype : str by :func:`networkx.Graph.to_directed`.
Type name for edges node_attrs : list[str], optional
node_attrs : list of str The names of the node attributes to retrieve from the NetworkX graph. If given, DGL
Names for node features to retrieve from the NetworkX graph (Default: None) stores the retrieved node attributes in ``ndata`` of the returned graph using their
edge_attrs : list of str original names. The attribute data must be convertible to Tensor type (e.g., scalar,
Names for edge features to retrieve from the NetworkX graph (Default: None) numpy.ndarray, list, etc.).
edge_attrs : list[str], optional
The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
stores the retrieved edge attributes in ``edata`` of the returned graph using their
original names. The attribute data must be convertible to Tensor type (e.g., scalar,
numpy.ndarray, list, etc.). It must be None if :attr:`nx_graph` is undirected.
edge_id_attr_name : str, optional edge_id_attr_name : str, optional
Key name for edge ids in the NetworkX graph. If not found, we The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
will consider the graph not to have pre-specified edge ids. (Default: 'id') IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
formats : str or list of str consecutive integers starting from zero. By default, the edge IDs of the returned graph
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them, can be arbitrary. It must be None if :attr:`nx_graph` is undirected.
Force the storage formats. Default: ``['coo', 'csr', 'csc']``. idtype : int32 or int64, optional
idtype : int32, int64, optional The data type for storing the structure-related graph information such as node and
Integer ID type. Must be int32 or int64. Default: int64. edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns Returns
------- -------
g : DGLGraph DGLGraph
The created graph.
Notes
-----
DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import networkx as nx
>>> import numpy as np
>>> import torch
Create a 2-edge NetworkX graph.
>>> nx_g = nx.DiGraph()
>>> # Add 3 nodes and two features for them
>>> nx_g.add_nodes_from([0, 1, 2], feat1=np.zeros((3, 1)), feat2=np.ones((3, 1)))
>>> # Add 2 edges (1, 2) and (2, 1) with two features, one being edge IDs
>>> nx_g.add_edge(1, 2, weight=np.ones((1, 1)), eid=np.array([1]))
>>> nx_g.add_edge(2, 1, weight=np.ones((1, 1)), eid=np.array([0]))
Convert it into a DGLGraph with structure only.
>>> g = dgl.from_networkx(nx_g)
Retrieve the node/edge features of the graph.
>>> g = dgl.from_networkx(nx_g, node_attrs=['feat1', 'feat2'], edge_attrs=['weight'])
Use a pre-specified ordering of the edges.
>>> g.edges()
(tensor([1, 2]), tensor([2, 1]))
>>> g = dgl.from_networkx(nx_g, edge_id_attr_name='eid')
(tensor([2, 1]), tensor([1, 2]))
Create a graph on the first GPU with data type int32.
>>> g = dgl.from_networkx(nx_g, idtype=torch.int32, device='cuda:0')
See Also
--------
graph
from_scipy
""" """
# Relabel nodes using consecutive integers # Sanity check
if edge_id_attr_name is not None and \
edge_id_attr_name not in next(iter(nx_graph.edges(data=True)))[-1]:
raise DGLError('Failed to find the pre-specified edge IDs in the edge features of '
'the NetworkX graph with name {}'.format(edge_id_attr_name))
if not nx_graph.is_directed() and not (edge_id_attr_name is None and edge_attrs is None):
raise DGLError('Expect edge_id_attr_name and edge_attrs to be None when nx_graph is '
'undirected, got {} and {}'.format(edge_id_attr_name, edge_attrs))
# Relabel nodes using consecutive integers starting from 0
nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted') nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted')
if not nx_graph.is_directed(): if not nx_graph.is_directed():
nx_graph = nx_graph.to_directed() nx_graph = nx_graph.to_directed()
g = graph(nx_graph, ntype, etype, u, v, urange, vrange = utils.graphdata2tensors(
formats=formats, nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)
idtype=idtype)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict # nx_graph.edges(data=True) returns src, dst, attr_dict
if nx_graph.number_of_edges() > 0: has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
has_edge_id = edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]
else:
has_edge_id = False
# handle features # handle features
# copy attributes # copy attributes
...@@ -912,31 +1013,252 @@ def from_networkx(nx_graph, *, ...@@ -912,31 +1013,252 @@ def from_networkx(nx_graph, *,
raise DGLError('Not all edges have attribute {}.'.format(attr)) raise DGLError('Not all edges have attribute {}.'.format(attr))
g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device) g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)
return g return g.to(device)
def bipartite_from_networkx(nx_graph,
utype, etype, vtype,
u_attrs=None, e_attrs=None, v_attrs=None,
edge_id_attr_name=None,
idtype=None,
device=None):
"""Create a unidirectional bipartite graph from a NetworkX graph.
The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
It is recommended to first convert a NetworkX graph into a tuple of node-tensors
and then construct a DGLGraph with :func:`dgl.heterograph`.
Parameters
----------
nx_graph : networkx.DiGraph
The NetworkX graph holding the graph structure and the node/edge attributes.
DGL will relabel the nodes using consecutive integers starting from zero if it is
not the case. The graph must follow `NetworkX's bipartite graph convention
<https://networkx.github.io/documentation/stable/reference/algorithms/bipartite.html>`_,
and furthermore the edges must be from nodes with attribute ``bipartite=0`` to nodes
with attribute ``bipartite=1``.
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
u_attrs : list[str], optional
The names of the node attributes for node type :attr:`utype` to retrieve from the
NetworkX graph. If given, DGL stores the retrieved node attributes in
``nodes[utype].data`` of the returned graph using their original names. The attribute
data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
e_attrs : list[str], optional
The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
stores the retrieved edge attributes in ``edata`` of the returned graph using their
original names. The attribute data must be convertible to Tensor type (e.g., scalar,
numpy.ndarray, list, etc.).
v_attrs : list[str], optional
The names of the node attributes for node type :attr:`vtype` to retrieve from the
NetworkX graph. If given, DGL stores the retrieved node attributes in
``nodes[vtype].data`` of the returned graph using their original names. The attribute
data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
edge_id_attr_name : str, optional
The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
consecutive integers starting from zero. By default, the edge IDs of the returned graph
can be arbitrary.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., torch.int32).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLGraph
The created graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import networkx as nx
>>> import numpy as np
>>> import torch
Create a 2-edge unidirectional bipartite graph.
>>> nx_g = nx.DiGraph()
>>> # Add nodes for the source type
>>> nx_g.add_nodes_from([1, 3], bipartite=0, feat1=np.zeros((2, 1)), feat2=np.ones((2, 1)))
>>> # Add nodes for the destination type
>>> nx_g.add_nodes_from([2, 4, 5], bipartite=1, feat3=np.zeros((3, 1)))
>>> nx_g.add_edge(1, 4, weight=np.ones((1, 1)), eid=np.array([1]))
>>> nx_g.add_edge(3, 5, weight=np.ones((1, 1)), eid=np.array([0]))
Convert it into a DGLGraph with structure only.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V')
Retrieve the node/edge features of the graph.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
... u_attrs=['feat1', 'feat2'],
... e_attrs=['weight'],
... v_attrs=['feat3'])
Use a pre-specified ordering of the edges.
>>> g.edges()
(tensor([0, 1]), tensor([1, 2]))
>>> g = dgl.bipartite_from_networkx(nx_g,
... utype='_U', etype='_E', vtype='_V',
... edge_id_attr_name='eid')
(tensor([1, 0]), tensor([2, 1]))
Create a graph on the first GPU with data type int32.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
... idtype=torch.int32, device='cuda:0')
See Also
--------
heterograph
bipartite_from_scipy
"""
if not nx_graph.is_directed():
raise DGLError('Expect nx_graph to be a directed NetworkX graph.')
if edge_id_attr_name is not None and \
not edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]:
raise DGLError('Failed to find the pre-specified edge IDs in the edge features '
'of the NetworkX graph with name {}'.format(edge_id_attr_name))
# Get the source and destination node sets
top_nodes = set()
bottom_nodes = set()
for n, ndata in nx_graph.nodes(data=True):
if 'bipartite' not in ndata:
raise DGLError('Expect the node {} to have attribute bipartite'.format(n))
if ndata['bipartite'] == 0:
top_nodes.add(n)
elif ndata['bipartite'] == 1:
bottom_nodes.add(n)
else:
raise ValueError('Expect the bipartite attribute of the node {} to be 0 or 1, '
'got {}'.format(n, ndata['bipartite']))
# Separately relabel the source and destination nodes.
top_nodes = sorted(top_nodes)
bottom_nodes = sorted(bottom_nodes)
top_map = {n : i for i, n in enumerate(top_nodes)}
bottom_map = {n : i for i, n in enumerate(bottom_nodes)}
# Get the node tensors and the number of nodes
u, v, urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, bipartite=True,
edge_id_attr_name=edge_id_attr_name,
top_map=top_map, bottom_map=bottom_map)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
# handle features
# copy attributes
def _batcher(lst):
if F.is_tensor(lst[0]):
return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0)
else:
return F.tensor(lst)
if u_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
src_attr_dict = defaultdict(list)
for nid in top_map.keys():
for attr in u_attrs:
src_attr_dict[attr].append(nx_graph.nodes[nid][attr])
for attr in u_attrs:
g.srcdata[attr] = F.copy_to(_batcher(src_attr_dict[attr]), g.device)
if v_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
dst_attr_dict = defaultdict(list)
for nid in bottom_map.keys():
for attr in v_attrs:
dst_attr_dict[attr].append(nx_graph.nodes[nid][attr])
for attr in v_attrs:
g.dstdata[attr] = F.copy_to(_batcher(dst_attr_dict[attr]), g.device)
if e_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
attr_dict = defaultdict(lambda: [None] * g.number_of_edges())
# each defaultdict value is initialized to be a list of None
# None here serves as placeholder to be replaced by feature with
# corresponding edge id
if has_edge_id:
for _, _, attrs in nx_graph.edges(data=True):
for key in e_attrs:
attr_dict[key][attrs[edge_id_attr_name]] = attrs[key]
else:
# XXX: assuming networkx iteration order is deterministic
# so the order is the same as graph_index.from_networkx
for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)):
for key in e_attrs:
attr_dict[key][eid] = attrs[key]
for attr in e_attrs:
for val in attr_dict[attr]:
if val is None:
raise DGLError('Not all edges have attribute {}.'.format(attr))
g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)
return g.to(device)
def to_networkx(g, node_attrs=None, edge_attrs=None): def to_networkx(g, node_attrs=None, edge_attrs=None):
"""Convert to networkx graph. """Convert a homogeneous graph to a NetworkX graph.
The edge id will be saved as the 'id' edge attribute. It will save the edge IDs as the ``'id'`` edge attribute in the returned NetworkX graph.
Parameters Parameters
---------- ----------
g : DGLGraph or DGLHeteroGraph g : DGLGraph
For DGLHeteroGraphs, we currently only support the A homogeneous graph on CPU.
case of one node type and one edge type.
node_attrs : iterable of str, optional node_attrs : iterable of str, optional
The node attributes to be copied. (Default: None) The node attributes to copy from ``g.ndata``. (Default: None)
edge_attrs : iterable of str, optional edge_attrs : iterable of str, optional
The edge attributes to be copied. (Default: None) The edge attributes to copy from ``g.edata``. (Default: None)
Returns Returns
------- -------
networkx.DiGraph networkx.DiGraph
The nx graph The converted NetworkX graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> g = dgl.graph((torch.tensor([1, 2]), torch.tensor([1, 3])))
>>> g.ndata['h'] = torch.zeros(4, 1)
>>> g.edata['h1'] = torch.ones(2, 1)
>>> g.edata['h2'] = torch.zeros(2, 2)
>>> nx_g = dgl.to_networkx(g, node_attrs=['h'], edge_attrs=['h1', 'h2'])
>>> nx_g.nodes(data=True)
NodeDataView({0: {'h': tensor([0.])},
1: {'h': tensor([0.])},
2: {'h': tensor([0.])},
3: {'h': tensor([0.])}})
>>> nx_g.edges(data=True)
OutMultiEdgeDataView([(1, 1, {'id': 0, 'h1': tensor([1.]), 'h2': tensor([0., 0.])}),
(2, 3, {'id': 1, 'h1': tensor([1.]), 'h2': tensor([0., 0.])})])
""" """
if g.device != F.cpu(): if g.device != F.cpu():
raise DGLError('Cannot convert a CUDA graph to networkx. Call g.cpu() first.') raise DGLError('Cannot convert a CUDA graph to networkx. Call g.cpu() first.')
if not g.is_homogeneous(): if not g.is_homogeneous:
raise DGLError('dgl.to_networkx only supports homogeneous graphs.') raise DGLError('dgl.to_networkx only supports homogeneous graphs.')
src, dst = g.edges() src, dst = g.edges()
src = F.asnumpy(src) src = F.asnumpy(src)
...@@ -967,8 +1289,7 @@ DGLHeteroGraph.to_networkx = to_networkx ...@@ -967,8 +1289,7 @@ DGLHeteroGraph.to_networkx = to_networkx
def create_from_edges(u, v, def create_from_edges(u, v,
utype, etype, vtype, utype, etype, vtype,
urange, vrange, urange, vrange,
validate=True, validate=True):
formats=['coo', 'csr', 'csc']):
"""Internal function to create a graph from incident nodes with types. """Internal function to create a graph from incident nodes with types.
utype could be equal to vtype utype could be equal to vtype
...@@ -993,9 +1314,6 @@ def create_from_edges(u, v, ...@@ -993,9 +1314,6 @@ def create_from_edges(u, v,
maximum of the destination node IDs in the edge list plus 1. (Default: None) maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional validate : bool, optional
If True, checks if node IDs are within range. If True, checks if node IDs are within range.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
Returns Returns
------- -------
...@@ -1016,12 +1334,8 @@ def create_from_edges(u, v, ...@@ -1016,12 +1334,8 @@ def create_from_edges(u, v,
else: else:
num_ntypes = 2 num_ntypes = 2
if 'coo' in formats: hgidx = heterograph_index.create_unitgraph_from_coo(
hgidx = heterograph_index.create_unitgraph_from_coo( num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'])
num_ntypes, urange, vrange, u, v, formats)
else:
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo']).formats(formats)
if utype == vtype: if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype]) return DGLHeteroGraph(hgidx, [utype], [etype])
else: else:
......
...@@ -19,7 +19,7 @@ from .. import convert ...@@ -19,7 +19,7 @@ from .. import convert
from .. import batch from .. import batch
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import graph as dgl_graph
from ..convert import to_networkx from ..convert import from_networkx, to_networkx
backend = os.environ.get('DGLBACKEND', 'pytorch') backend = os.environ.get('DGLBACKEND', 'pytorch')
...@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset): ...@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
test_mask = _sample_mask(idx_test, labels.shape[0]) test_mask = _sample_mask(idx_test, labels.shape[0])
self._graph = graph self._graph = graph
g = dgl_graph(graph) g = from_networkx(graph)
g.ndata['train_mask'] = generate_mask_tensor(train_mask) g.ndata['train_mask'] = generate_mask_tensor(train_mask)
g.ndata['val_mask'] = generate_mask_tensor(val_mask) g.ndata['val_mask'] = generate_mask_tensor(val_mask)
...@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset): ...@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
for line in f.readlines(): for line in f.readlines():
if line.startswith('graph'): if line.startswith('graph'):
if len(elist) != 0: if len(elist) != 0:
self.graphs.append(dgl_graph(elist)) self.graphs.append(dgl_graph(tuple(zip(*elist))))
elist = [] elist = []
else: else:
u, v = line.strip().split(' ') u, v = line.strip().split(' ')
elist.append((int(u), int(v))) elist.append((int(u), int(v)))
if len(elist) != 0: if len(elist) != 0:
self.graphs.append(dgl_graph(elist)) self.graphs.append(dgl_graph(tuple(zip(*elist))))
with open("{}/pmpds.pkl".format(root), 'rb') as f: with open("{}/pmpds.pkl".format(root), 'rb') as f:
self.pmpds = _pickle_load(f) self.pmpds = _pickle_load(f)
self.labels = [] self.labels = []
......
...@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset): ...@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):
self.labels.append(self.glabel_dict[glabel]) self.labels.append(self.glabel_dict[glabel])
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(n_nodes) g.add_nodes(n_nodes)
nlabels = [] # node labels nlabels = [] # node labels
......
...@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None): ...@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
and edge features. and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]) >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g2 = dgl.graph(([0, 2], [2, 3]) >>> g2 = dgl.graph(([0, 2], [2, 3]))
>>> g2.edata["e"] = th.ones(2, 4) >>> g2.edata["e"] = th.ones(2, 4)
Save Graphs into file Save Graphs into file
......
...@@ -6,7 +6,7 @@ import networkx as nx ...@@ -6,7 +6,7 @@ import networkx as nx
from .. import backend as F from .. import backend as F
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from .utils import deprecate_property from .utils import deprecate_property
from ..convert import graph as dgl_graph from ..convert import from_networkx
__all__ = ['KarateClubDataset', 'KarateClub'] __all__ = ['KarateClubDataset', 'KarateClub']
...@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset): ...@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
label = np.asarray( label = np.asarray(
[kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64) [kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
label = F.tensor(label) label = F.tensor(label)
g = dgl_graph(kc_graph) g = from_networkx(kc_graph)
g.ndata['label'] = label g.ndata['label'] = label
self._graph = g self._graph = g
self._data = [g] self._data = [g]
......
...@@ -6,7 +6,7 @@ import numpy as np ...@@ -6,7 +6,7 @@ import numpy as np
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from .utils import save_graphs, load_graphs, makedirs from .utils import save_graphs, load_graphs, makedirs
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import from_networkx
from ..transform import add_self_loop from ..transform import add_self_loop
__all__ = ['MiniGCDataset'] __all__ = ['MiniGCDataset']
...@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset): ...@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
# preprocess # preprocess
for i in range(self.num_graphs): for i in range(self.num_graphs):
# convert to DGLGraph, and add self loops # convert to DGLGraph, and add self loops
self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i])) self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
self.labels = F.tensor(np.array(self.labels).astype(np.int)) self.labels = F.tensor(np.array(self.labels).astype(np.int))
def _gen_cycle(self, n): def _gen_cycle(self, n):
......
...@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset): ...@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset):
# convert to heterograph # convert to heterograph
if self.verbose: if self.verbose:
print('Convert to heterograph ...') print('Convert to heterograph ...')
hg = dgl.to_hetero(g, hg = dgl.to_heterogeneous(g,
ntypes, ntypes,
etypes, etypes,
metagraph=mg) metagraph=mg)
if self.verbose: if self.verbose:
print('#Node types:', len(hg.ntypes)) print('#Node types:', len(hg.ntypes))
print('#Canonical edge types:', len(hg.etypes)) print('#Canonical edge types:', len(hg.etypes))
......
...@@ -8,7 +8,7 @@ import os ...@@ -8,7 +8,7 @@ import os
from .dgl_dataset import DGLBuiltinDataset from .dgl_dataset import DGLBuiltinDataset
from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import from_scipy
class RedditDataset(DGLBuiltinDataset): class RedditDataset(DGLBuiltinDataset):
...@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset): ...@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
# graph # graph
coo_adj = sp.load_npz(os.path.join( coo_adj = sp.load_npz(os.path.join(
self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str))) self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
self._graph = dgl_graph(coo_adj) self._graph = from_scipy(coo_adj)
# features and labels # features and labels
reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz")) reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
features = reddit_data["feature"] features = reddit_data["feature"]
......
...@@ -8,7 +8,7 @@ import numpy.random as npr ...@@ -8,7 +8,7 @@ import numpy.random as npr
import scipy as sp import scipy as sp
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from ..convert import graph as dgl_graph from ..convert import from_scipy
from .. import batch from .. import batch
from .utils import save_info, save_graphs, load_info, load_graphs from .utils import save_info, save_graphs, load_info, load_graphs
...@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset): ...@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
pq = [generator() for _ in range(self._n_graphs)] pq = [generator() for _ in range(self._n_graphs)]
else: else:
raise RuntimeError() raise RuntimeError()
self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq] self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs] self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
in_degrees = lambda g: g.in_degrees().float() in_degrees = lambda g: g.in_degrees().float()
self._graph_degrees = [in_degrees(g) for g in self._graphs] self._graph_degrees = [in_degrees(g) for g in self._graphs]
......
...@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero( DS_graph_labels = self._idx_from_zero(
np.genfromtxt(self._file_path("graph_labels"), dtype=int)) np.genfromtxt(self._file_path("graph_labels"), dtype=int))
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1) g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
...@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset): ...@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero( DS_graph_labels = self._idx_from_zero(
loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int)) loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1) g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......
...@@ -596,8 +596,11 @@ class EdgeCollator(Collator): ...@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
'graph has multiple or no edge types; '\ 'graph has multiple or no edge types; '\
'please return a dict in negative sampler.' 'please return a dict in negative sampler.'
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst} neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0])
neg_edges = { neg_edges = {
etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes} etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph( neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes}) neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
...@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(), ...@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device) rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device) cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols), g = convert.graph((rows, cols),
num_nodes=num_nodes, validate=False, num_nodes=num_nodes,
formats=formats,
idtype=idtype, device=device) idtype=idtype, device=device)
return g return g.formats(formats)
def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(), idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']): formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and """Generate a random bipartite graph of the given number of src/dst nodes and
...@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, ...@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
Parameters Parameters
---------- ----------
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
num_src_nodes : int num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`. The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
num_dst_nodes : int num_dst_nodes : int
...@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, ...@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False) eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device) rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device) cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.bipartite((rows, cols), g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
num_nodes=(num_src_nodes, num_dst_nodes), validate=False, {utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device, idtype=idtype, device=device)
formats=formats) return g.formats(formats)
return g
"""Classes for heterogeneous graphs.""" """Classes for heterogeneous graphs."""
#pylint: disable= too-many-lines #pylint: disable= too-many-lines
from collections import defaultdict from collections import defaultdict, Iterable
from collections.abc import Mapping from collections.abc import Mapping
from contextlib import contextmanager from contextlib import contextmanager
import copy import copy
...@@ -75,38 +75,21 @@ class DGLHeteroGraph(object): ...@@ -75,38 +75,21 @@ class DGLHeteroGraph(object):
One can construct the graph as follows: One can construct the graph as follows:
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
Or equivalently
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]), ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1]), ... ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
... }) ... })
:func:`dgl.graph` and :func:`dgl.bipartite` can create a graph from a variety of
data types including:
* edge list
* edge tuples
* networkx graph
* scipy sparse matrix
Click the function names for more details.
Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments: Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
>>> g.number_of_nodes('user') >>> g.number_of_nodes('user')
3 3
>>> g.number_of_edges('plays') >>> g.number_of_edges('plays')
4 4
>>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' relation >>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' edge type
tensor([1, 1]) tensor([1, 1])
>>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' relation >>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' edge type
(tensor([0]), tensor([0])) (tensor([0]), tensor([0]))
Or on the sliced graph for an edge type: Or on the sliced graph for an edge type:
...@@ -125,9 +108,10 @@ class DGLHeteroGraph(object): ...@@ -125,9 +108,10 @@ class DGLHeteroGraph(object):
For example, suppose a graph that has two types of relation "user-watches-movie" For example, suppose a graph that has two types of relation "user-watches-movie"
and "user-watches-TV" as follows: and "user-watches-TV" as follows:
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie') >>> GG = dgl.heterograph({
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV') ... ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
>>> GG = dgl.hetero_from_relations([g0, g1]) # Merge the two graphs ... ('user', 'watches', 'TV'): ([0, 1], [0, 1])
... })
To distinguish between the two "watches" edge type, one must specify a full triplet: To distinguish between the two "watches" edge type, one must specify a full triplet:
...@@ -400,11 +384,11 @@ class DGLHeteroGraph(object): ...@@ -400,11 +384,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs with Multiple Node Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.add_nodes(2) >>> g.add_nodes(2)
DGLError: Node type name must be specified DGLError: Node type name must be specified
if there are more than one node types. if there are more than one node types.
...@@ -547,7 +531,7 @@ class DGLHeteroGraph(object): ...@@ -547,7 +531,7 @@ class DGLHeteroGraph(object):
We can also assign features for the new edges in adding new edges. We can also assign features for the new edges in adding new edges.
>>> g.add_edges(torch.tensor([0, 0]), torch.tensor([2, 2]), >>> g.add_edges(torch.tensor([0, 0]), torch.tensor([2, 2]),
>>> {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)}) ... {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
>>> g.edata['h'] >>> g.edata['h']
tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]]) tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
...@@ -560,17 +544,17 @@ class DGLHeteroGraph(object): ...@@ -560,17 +544,17 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Edge Types** **Heterogeneous Graphs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.add_edges(torch.tensor([3]), torch.tensor([3])) >>> g.add_edges(torch.tensor([3]), torch.tensor([3]))
DGLError: Edge type name must be specified DGLError: Edge type name must be specified
if there are more than one edge types. if there are more than one edge types.
>>> g.number_of_edges('plays') >>> g.number_of_edges('plays')
4 4
>>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays') >>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays')
>>> g.number_of_edges('plays') >>> g.number_of_edges('plays')
5 5
...@@ -696,11 +680,11 @@ class DGLHeteroGraph(object): ...@@ -696,11 +680,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Edge Types** **Heterogeneous Graphs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.remove_edges(torch.tensor([0, 1])) >>> g.remove_edges(torch.tensor([0, 1]))
DGLError: Edge type name must be specified DGLError: Edge type name must be specified
if there are more than one edge types. if there are more than one edge types.
...@@ -784,11 +768,11 @@ class DGLHeteroGraph(object): ...@@ -784,11 +768,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Node Types** **Heterogeneous Graphs with Multiple Node Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.remove_nodes(torch.tensor([0, 1])) >>> g.remove_nodes(torch.tensor([0, 1]))
DGLError: Node type name must be specified DGLError: Node type name must be specified
if there are more than one node types. if there are more than one node types.
...@@ -853,7 +837,7 @@ class DGLHeteroGraph(object): ...@@ -853,7 +837,7 @@ class DGLHeteroGraph(object):
A uni-bipartite heterograph can further divide its node types into two sets: A uni-bipartite heterograph can further divide its node types into two sets:
SRC and DST. All edges are from nodes in SRC to nodes in DST. The following APIs SRC and DST. All edges are from nodes in SRC to nodes in DST. The following APIs
can be used to get the nodes and types that belong to SRC and DST sets: can be used to get the type, data, and nodes that belong to SRC and DST sets:
* :func:`srctype` and :func:`dsttype` * :func:`srctype` and :func:`dsttype`
* :func:`srcdata` and :func:`dstdata` * :func:`srcdata` and :func:`dstdata`
...@@ -867,67 +851,131 @@ class DGLHeteroGraph(object): ...@@ -867,67 +851,131 @@ class DGLHeteroGraph(object):
@property @property
def ntypes(self): def ntypes(self):
"""Return the list of node types of this graph. """Return the node types of the graph.
Returns Returns
------- -------
list of str list of str
Each ``str`` is a node type.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
>>> g = dgl.hetero_from_relations([follows_g, plays_g]) ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.ntypes >>> g.ntypes
['user', 'game'] ['game', 'user']
""" """
return self._ntypes return self._ntypes
@property @property
def etypes(self): def etypes(self):
"""Return the list of edge types of this graph. """Return the edge types of the graph.
Returns Returns
------- -------
list of str list of str
Each ``str`` is an edge type.
Notes
-----
An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
``('protein', 'interacts', 'protein')``. It is recommended to use
:func:`~dgl.DGLGraph.canonical_etypes` in this case.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
>>> g = dgl.hetero_from_relations([follows_g, plays_g]) ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.etypes >>> g.etypes
['follows', 'plays'] ['follows', 'follows', 'plays']
""" """
return self._etypes return self._etypes
@property @property
def canonical_etypes(self): def canonical_etypes(self):
"""Return the list of canonical edge types of this graph. """Return the canonical edge types of the graph.
A canonical edge type is a tuple of string (src_type, edge_type, dst_type). A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
and destination nodes respectively.
Returns Returns
------- -------
list of 3-tuples list of 3-tuple of str
Each 3-tuple of str is a canonical edge type.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
>>> g = dgl.hetero_from_relations([follows_g, plays_g]) ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.canonical_etypes >>> g.canonical_etypes
[('user', 'follows', 'user'), ('user', 'plays', 'game')] [('user', 'follows', 'user'),
('user', 'follows', 'game'),
('user', 'plays', 'game')]
""" """
return self._canonical_etypes return self._canonical_etypes
@property @property
def srctypes(self): def srctypes(self):
"""Return the node types in the SRC category. Return :attr:``ntypes`` if """Return the source node types.
the graph is not a uni-bipartite graph.
Returns
-------
list of str
* If the graph is a uni-bipartite graph, it returns the source node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.srctypes
['developer', 'user']
Query for a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.srctypes
['developer', 'game', 'user']
""" """
if self.is_unibipartite: if self.is_unibipartite:
return sorted(list(self._srctypes_invmap.keys())) return sorted(list(self._srctypes_invmap.keys()))
...@@ -936,8 +984,41 @@ class DGLHeteroGraph(object): ...@@ -936,8 +984,41 @@ class DGLHeteroGraph(object):
@property @property
def dsttypes(self): def dsttypes(self):
"""Return the node types in the DST category. Return :attr:``ntypes`` if """Return the destination node types.
the graph is not a uni-bipartite graph.
Returns
-------
list of str
Each str is a node type.
* If the graph is a uni-bipartite graph, it returns the destination node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.dsttypes
['game']
Query for a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.dsttypes
['developer', 'game', 'user']
""" """
if self.is_unibipartite: if self.is_unibipartite:
return sorted(list(self._dsttypes_invmap.keys())) return sorted(list(self._dsttypes_invmap.keys()))
...@@ -945,33 +1026,34 @@ class DGLHeteroGraph(object): ...@@ -945,33 +1026,34 @@ class DGLHeteroGraph(object):
return self.ntypes return self.ntypes
def metagraph(self): def metagraph(self):
"""Return the metagraph as networkx.MultiDiGraph. """Return the metagraph of the heterograph.
The nodes are labeled with node type names. The metagraph (or network schema) of a heterogeneous network specifies type constraints
The edges have their keys holding the edge type names. on the sets of nodes and edges between the nodes. For a formal definition, refer to
`Yizhou et al. <https://www.kdd.org/exploration_files/V14-02-03-Sun.pdf>`_.
Returns Returns
------- -------
networkx.MultiDiGraph networkx.MultiDiGraph
The metagraph.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> import dgl
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') >>> import torch
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> meta_g = g.metagraph()
The metagraph then has two nodes and two edges.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> meta_g = g.metagraph()
>>> meta_g.nodes() >>> meta_g.nodes()
NodeView(('user', 'game')) NodeView(('user', 'game'))
>>> meta_g.number_of_nodes()
2
>>> meta_g.edges() >>> meta_g.edges()
OutMultiEdgeDataView([('user', 'user'), ('user', 'game')]) OutMultiEdgeDataView([('user', 'user'), ('user', 'game'), ('user', 'game')])
>>> meta_g.number_of_edges()
2
""" """
nx_graph = self._graph.metagraph.to_networkx() nx_graph = self._graph.metagraph.to_networkx()
nx_metagraph = nx.MultiDiGraph() nx_metagraph = nx.MultiDiGraph()
...@@ -981,38 +1063,56 @@ class DGLHeteroGraph(object): ...@@ -981,38 +1063,56 @@ class DGLHeteroGraph(object):
return nx_metagraph return nx_metagraph
def to_canonical_etype(self, etype): def to_canonical_etype(self, etype):
"""Convert edge type to canonical etype: (srctype, etype, dsttype). """Convert an edge type to the corresponding canonical edge type in the graph.
The input can already be a canonical tuple. A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
nodes, edges and destination nodes.
Parameters Parameters
---------- ----------
etype : str or tuple of str etype : str or 3-tuple of str
Edge type If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
type in the graph. If :attr:`etype` is already a canonical edge type
(3-tuple of str), it simply returns :attr:`etype`.
Returns Returns
------- -------
tuple of str 3-tuple of str
The canonical edge type corresponding to the edge type.
Notes
-----
If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
for :attr:`etype` while ``'watches'`` is a valid one.
Examples Examples
-------- --------
The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
Create a heterograph.
>>> g1 = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> g2 = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
>>> g3 = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'follows', 'game') ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
>>> g = dgl.hetero_from_relations([g1, g2, g3]) ... ('developer', 'follows', 'game'): ([0, 1], [0, 1])
... })
Get canonical edge types. Map an edge type to its corresponding canonical edge type.
>>> g.to_canonical_etype('plays') >>> g.to_canonical_etype('plays')
('user', 'plays', 'game') ('user', 'plays', 'game')
>>> g.to_canonical_etype(('user', 'plays', 'game')) >>> g.to_canonical_etype(('user', 'plays', 'game'))
('user', 'plays', 'game') ('user', 'plays', 'game')
>>> g.to_canonical_etype('follows')
DGLError: Edge type "follows" is ambiguous. See Also
Please use canonical etype type in the form of (srctype, etype, dsttype) --------
canonical_etypes
""" """
if etype is None: if etype is None:
if len(self.etypes) != 1: if len(self.etypes) != 1:
...@@ -1026,8 +1126,8 @@ class DGLHeteroGraph(object): ...@@ -1026,8 +1126,8 @@ class DGLHeteroGraph(object):
if ret is None: if ret is None:
raise DGLError('Edge type "{}" does not exist.'.format(etype)) raise DGLError('Edge type "{}" does not exist.'.format(etype))
if len(ret) == 0: if len(ret) == 0:
raise DGLError('Edge type "%s" is ambiguous. Please use canonical etype ' raise DGLError('Edge type "%s" is ambiguous. Please use canonical edge type '
'type in the form of (srctype, etype, dsttype)' % etype) 'in the form of (srctype, etype, dsttype)' % etype)
return ret return ret
def get_ntype_id(self, ntype): def get_ntype_id(self, ntype):
...@@ -1144,11 +1244,93 @@ class DGLHeteroGraph(object): ...@@ -1144,11 +1244,93 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
@property @property
def batch_size(self): def batch_size(self):
"""TBD""" """Return the number of graphs in the batched graph.
Returns
-------
int
The Number of graphs in the batch. If the graph is not a batched one,
it will return 1.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_size
1
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_size
2
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg1.batch_size
1
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_size
2
"""
return len(self.batch_num_nodes(self.ntypes[0])) return len(self.batch_num_nodes(self.ntypes[0]))
def batch_num_nodes(self, ntype=None): def batch_num_nodes(self, ntype=None):
"""TBD""" """Return the number of nodes for each graph in the batch with the specified node type.
Parameters
----------
ntype : str, optional
The node type for query. If the graph has multiple node types, one must
specify the argument. Otherwise, it can be omitted. If the graph is not a batched
one, it will return a list of length 1 that holds the number of nodes in the graph.
Returns
-------
Tensor
The number of nodes with the specified type for each graph in the batch. The i-th
element of it is the number of nodes with the specified type for the i-th graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_num_nodes()
tensor([4])
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_num_nodes()
tensor([4, 3])
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_num_nodes('user')
tensor([2, 1])
"""
if ntype is not None and ntype not in self.ntypes:
raise DGLError('Expect ntype in {}, got {}'.format(self.ntypes, ntype))
if self._batch_num_nodes is None: if self._batch_num_nodes is None:
self._batch_num_nodes = {} self._batch_num_nodes = {}
for ty in self.ntypes: for ty in self.ntypes:
...@@ -1170,7 +1352,52 @@ class DGLHeteroGraph(object): ...@@ -1170,7 +1352,52 @@ class DGLHeteroGraph(object):
self._batch_num_nodes = val self._batch_num_nodes = val
def batch_num_edges(self, etype=None): def batch_num_edges(self, etype=None):
"""TBD""" """Return the number of edges for each graph in the batch with the specified edge type.
Parameters
----------
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
Tensor
The number of edges with the specified type for each graph in the batch. The i-th
element of it is the number of edges with the specified type for the i-th graph.
If the graph is not a batched one, it will return a list of length 1 that holds
the number of edges in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_num_edges()
tensor([3])
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_num_edges()
tensor([3, 4])
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_num_edges('plays')
tensor([2, 2])
"""
if self._batch_num_edges is None: if self._batch_num_edges is None:
self._batch_num_edges = {} self._batch_num_edges = {}
for ty in self.canonical_etypes: for ty in self.canonical_etypes:
...@@ -1181,6 +1408,8 @@ class DGLHeteroGraph(object): ...@@ -1181,6 +1408,8 @@ class DGLHeteroGraph(object):
raise DGLError('Edge type name must be specified if there are more than one ' raise DGLError('Edge type name must be specified if there are more than one '
'edge types.') 'edge types.')
etype = self.canonical_etypes[0] etype = self.canonical_etypes[0]
else:
etype = self.to_canonical_etype(etype)
return self._batch_num_edges[etype] return self._batch_num_edges[etype]
def set_batch_num_edges(self, val): def set_batch_num_edges(self, val):
...@@ -1197,37 +1426,114 @@ class DGLHeteroGraph(object): ...@@ -1197,37 +1426,114 @@ class DGLHeteroGraph(object):
@property @property
def nodes(self): def nodes(self):
"""Return a node view that can be used to set/get feature """Return a node view
data of a single node type.
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all users >>> import dgl
>>> import torch
Create a homogeneous graph and a heterogeneous graph of two node types.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Get the node IDs of the homogeneous graph.
>>> g.nodes()
tensor([0, 1, 2])
Get the node IDs of the heterogeneous graph. With multiple node types introduced,
one needs to specify the node type for query.
>>> hg.nodes('user')
tensor([0, 1, 2, 3, 4])
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') Set and get a feature 'h' for all nodes of a single type in the heterogeneous graph.
>>> g.nodes['user'].data['h'] = torch.zeros(3, 5)
>>> hg.nodes['user'].data['h'] = torch.ones(5, 1)
>>> hg.nodes['user'].data['h']
tensor([[1.], [1.], [1.], [1.], [1.]])
To set node features for a graph with a single node type, use :func:`DGLGraph.ndata`.
See Also See Also
-------- --------
ndata ndata
""" """
# Todo (Mufei) Replace the syntax g.nodes[...].ndata[...] with g.nodes[...][...]
return HeteroNodeView(self, self.get_ntype_id) return HeteroNodeView(self, self.get_ntype_id)
@property @property
def srcnodes(self): def srcnodes(self):
"""Return a SRC node view that can be used to set/get feature """Return a node view for source nodes
data of a single node type.
If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
this is :func:`nodes` restricted to source node types. Otherwise, it is an alias
for :func:`nodes`.
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all users >>> import dgl
>>> import torch
Create a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
Get the node IDs for source node types.
>>> g.srcnodes('user')
tensor([0])
>>> g.srcnodes('developer')
tensor([0, 1])
Set/get features for source node types.
>>> g.srcnodes['user'].data['h'] = torch.ones(1, 1)
>>> g.srcnodes['user'].data['h']
tensor([[1.]])
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game') Create a graph that is not uni-bipartite.
>>> g.srcnodes['user'].data['h'] = torch.zeros(2, 5)
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
:func:`dgl.DGLGraph.srcnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
get the node IDs for both source and destination node types.
>>> g.srcnodes('game')
tensor([0, 1, 2])
One can also set/get features for destination node types in this case.
>>> g.srcnodes['game'].data['h'] = torch.ones(3, 1)
>>> g.srcnodes['game'].data['h']
tensor([[1.],
[1.],
[1.]])
See Also See Also
-------- --------
...@@ -1237,17 +1543,63 @@ class DGLHeteroGraph(object): ...@@ -1237,17 +1543,63 @@ class DGLHeteroGraph(object):
@property @property
def dstnodes(self): def dstnodes(self):
"""Return a DST node view that can be used to set/get feature """Return a node view for destination nodes
data of a single node type.
If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
this is :func:`nodes` restricted to destination node types. Otherwise, it is an alias
for :func:`nodes`.
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all games >>> import dgl
>>> import torch
Create a uni-bipartite graph.
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game') >>> g = dgl.heterograph({
>>> g.dstnodes['game'].data['h'] = torch.zeros(3, 5) ... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
Get the node IDs for destination node types.
>>> g.dstnodes('game')
tensor([0, 1, 2])
Set/get features for destination node types.
>>> g.dstnodes['game'].data['h'] = torch.ones(3, 1)
>>> g.dstnodes['game'].data['h']
tensor([[1.],
[1.],
[1.]])
Create a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
:func:`dgl.DGLGraph.dstnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
get the node IDs for both source and destination node types.
>>> g.dstnodes('developer')
tensor([0, 1])
One can also set/get features for source node types in this case.
>>> g.dstnodes['developer'].data['h'] = torch.ones(2, 1)
>>> g.dstnodes['developer'].data['h']
tensor([[1.],
[1.]])
See Also See Also
-------- --------
...@@ -1257,50 +1609,53 @@ class DGLHeteroGraph(object): ...@@ -1257,50 +1609,53 @@ class DGLHeteroGraph(object):
@property @property
def ndata(self): def ndata(self):
"""Return the data view of all the nodes. """Return a node data view for setting/getting node features
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single node type, ``g.ndata[feat]``
returns the node feature associated with the name ``feat``. One can also set a node
feature associated with the name ``feat`` by setting ``g.ndata[feat]`` to a tensor.
If the graph has only one node type, ``g.ndata['feat']`` gives If ``g`` is a graph of multiple node types, ``g.ndata[feat]`` returns a
the node feature data under name ``'feat'``. dict[str, Tensor] mapping node types to the node features associated with the name
If the graph has multiple node types, then ``g.ndata['feat']`` ``feat`` for the corresponding type. One can also set a node feature associated
returns a dictionary where the key is the node type and the with the name ``feat`` for some node type(s) by setting ``g.ndata[feat]`` to a
value is the node feature tensor. If the node type does not dictionary as described.
have feature `'feat'`, it is not included in the dictionary.
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all nodes in a heterogeneous graph >>> import dgl
with only one node type: >>> import torch
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.ndata['h'] = torch.zeros(3, 5)
To set features of all nodes in a heterogeneous graph Set and get feature 'h' for a graph of a single node type.
with multiple node types:
>>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])}) >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.ndata['h'] = {'user': torch.zeros(2, 5), >>> g.ndata['h'] = torch.ones(3, 1)
... 'movie': torch.zeros(3, 5)}
>>> g.ndata['h'] >>> g.ndata['h']
... {'user': tensor([[0., 0., 0., 0., 0.], tensor([[1.],
... [0., 0., 0., 0., 0.]]), [1.],
... 'movie': tensor([[0., 0., 0., 0., 0.], [1.]])
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of nodes in a heterogeneous graph Set and get feature 'h' for a graph of multiple node types.
with multiple node types:
>>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])}) >>> g = dgl.heterograph({
>>> g.ndata['h'] = {'user': torch.zeros(2, 5)} ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... })
>>> g.ndata['h'] = {'game': torch.zeros(2, 1), 'player': torch.ones(3, 1)}
>>> g.ndata['h'] >>> g.ndata['h']
... {'user': tensor([[0., 0., 0., 0., 0.], {'game': tensor([[0.], [0.]]),
... [0., 0., 0., 0., 0.]])} 'player': tensor([[1.], [1.], [1.]])}
>>> # clean the feature 'h' and no node type contains 'h' >>> g.ndata['h'] = {'game': torch.ones(2, 1)}
>>> g.ndata.pop('h')
>>> g.ndata['h'] >>> g.ndata['h']
... {} {'game': tensor([[1.], [1.]]),
'player': tensor([[1.], [1.], [1.]])}
See Also See Also
-------- --------
...@@ -1315,84 +1670,62 @@ class DGLHeteroGraph(object): ...@@ -1315,84 +1670,62 @@ class DGLHeteroGraph(object):
ntypes = self.ntypes ntypes = self.ntypes
return HeteroNodeDataView(self, ntypes, ntids, ALL) return HeteroNodeDataView(self, ntypes, ntids, ALL)
@property @property
def srcdata(self): def srcdata(self):
"""Return the data view of all nodes in the SRC category. """Return a node data view for setting/getting source node features.
If the source nodes have only one node type, ``g.srcdata['feat']`` Let ``g`` be a DGLGraph. If ``g`` is a graph of a single source node type,
gives the node feature data under name ``'feat'``. ``g.srcdata[feat]`` returns the source node feature associated with the name ``feat``.
If the source nodes have multiple node types, then One can also set a source node feature associated with the name ``feat`` by
``g.srcdata['feat']`` returns a dictionary where the key is setting ``g.srcdata[feat]`` to a tensor.
the source node type and the value is the node feature
tensor. If the source node type does not have feature If ``g`` is a graph of multiple source node types, ``g.srcdata[feat]`` returns a
`'feat'`, it is not included in the dictionary. dict[str, Tensor] mapping source node types to the node features associated with
the name ``feat`` for the corresponding type. One can also set a node feature
associated with the name ``feat`` for some source node type(s) by setting
``g.srcdata[feat]`` to a dictionary as described.
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all source nodes in a graph with only one edge type: >>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.srcdata['h'] = torch.zeros(2, 5)
This is equivalent to
>>> g.nodes['user'].data['h'] = torch.zeros(2, 5)
Also work on more complex uni-bipartite graph Set and get feature 'h' for a graph of a single source node type.
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
... ('user', 'reads', 'book') : ([0, 1], [1, 0]), >>> g.srcdata['h'] = torch.ones(2, 1)
... }) >>> g.srcdata['h']
>>> print(g.is_unibipartite) tensor([[1.],
True [1.]])
>>> g.srcdata['h'] = torch.zeros(2, 5)
To set features of all source nodes in a uni-bipartite graph Set and get feature 'h' for a graph of multiple source node types.
with multiple source node types:
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('game', 'liked-by', 'user') : ([1, 2], [0, 1]), ... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('book', 'liked-by', 'user') : ([0, 1], [1, 0]), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... }) ... })
>>> print(g.is_unibipartite) >>> g.srcdata['h'] = {'user': torch.zeros(3, 1), 'player': torch.ones(3, 1)}
True
>>> g.srcdata['h'] = {'game' : torch.zeros(3, 5),
... 'book' : torch.zeros(2, 5)}
>>> g.srcdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... 'book': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of source nodes in a uni-bipartite graph
with multiple source node types:
>>> g = dgl.heterograph({
... ('game', 'liked-by', 'user') : ([1, 2], [0, 1]),
... ('book', 'liked-by', 'user') : ([0, 1], [1, 0]),
... })
>>> g.srcdata['h'] = {'game' : torch.zeros(3, 5)}
>>> g.srcdata['h'] >>> g.srcdata['h']
>>> {'game': tensor([[0., 0., 0., 0., 0.], {'player': tensor([[1.], [1.], [1.]]),
... [0., 0., 0., 0., 0.], 'user': tensor([[0.], [0.], [0.]])}
... [0., 0., 0., 0., 0.]])} >>> g.srcdata['h'] = {'user': torch.ones(3, 1)}
>>> # clean the feature 'h' and no source node type contains 'h'
>>> g.srcdata.pop('h')
>>> g.srcdata['h'] >>> g.srcdata['h']
... {} {'player': tensor([[1.], [1.], [1.]]),
'user': tensor([[1.], [1.], [1.]])}
Notes
-----
This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
See Also See Also
-------- --------
nodes nodes
ndata
srcnodes
""" """
if len(self.srctypes) == 1: if len(self.srctypes) == 1:
ntype = self.srctypes[0] ntype = self.srctypes[0]
...@@ -1405,81 +1738,61 @@ class DGLHeteroGraph(object): ...@@ -1405,81 +1738,61 @@ class DGLHeteroGraph(object):
@property @property
def dstdata(self): def dstdata(self):
"""Return the data view of all destination nodes. """Return a node data view for setting/getting destination node features.
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single destination node type,
``g.dstdata[feat]`` returns the destination node feature associated with the name
``feat``. One can also set a destination node feature associated with the name
``feat`` by setting ``g.dstdata[feat]`` to a tensor.
If the destination nodes have only one node type, If ``g`` is a graph of multiple destination node types, ``g.dstdata[feat]`` returns a
``g.dstdata['feat']`` gives the node feature data under name dict[str, Tensor] mapping destination node types to the node features associated with
``'feat'``. the name ``feat`` for the corresponding type. One can also set a node feature
If the destination nodes have multiple node types, then associated with the name ``feat`` for some destination node type(s) by setting
``g.dstdata['feat']`` returns a dictionary where the key is ``g.dstdata[feat]`` to a dictionary as described.
the destination node type and the value is the node feature
tensor. If the destination node type does not have feature Notes
`'feat'`, it is not included in the dictionary. -----
For setting features, the device of the features must be the same as the device
of the graph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all source nodes in a graph with only one edge type: >>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game') Set and get feature 'h' for a graph of a single destination node type.
>>> g.dstdata['h'] = torch.zeros(3, 5)
This is equivalent to
>>> g.nodes['game'].data['h'] = torch.zeros(3, 5)
Also work on more complex uni-bipartite graph
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
... ('store', 'sells', 'game') : ([0, 1], [1, 0]), >>> g.dstdata['h'] = torch.ones(3, 1)
... }) >>> g.dstdata['h']
>>> print(g.is_unibipartite) tensor([[1.],
True [1.],
>>> g.dstdata['h'] = torch.zeros(3, 5) [1.]])
To set features of all destination nodes in a uni-bipartite graph Set and get feature 'h' for a graph of multiple destination node types.
with multiple destination node types::
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([1, 2])),
... ('user', 'reads', 'book') : ([0, 1], [1, 0]), ... ('user', 'watches', 'movie'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... }) ... })
>>> print(g.is_unibipartite) >>> g.dstdata['h'] = {'game': torch.zeros(3, 1), 'movie': torch.ones(2, 1)}
True
>>> g.dstdata['h'] = {'game' : torch.zeros(3, 5),
... 'book' : torch.zeros(2, 5)}
>>> g.dstdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... 'book': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of destination nodes in a uni-bipartite graph
with multiple destination node types:
>>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]),
... ('user', 'reads', 'book') : ([0, 1], [1, 0]),
... })
>>> g.dstdata['h'] = {'game' : torch.zeros(3, 5)}
>>> g.dstdata['h'] >>> g.dstdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.], {'game': tensor([[0.], [0.], [0.]]),
... [0., 0., 0., 0., 0.], 'movie': tensor([[1.], [1.]])}
... [0., 0., 0., 0., 0.]])} >>> g.dstdata['h'] = {'game': torch.ones(3, 1)}
>>> # clean the feature 'h' and no destination node type contains 'h'
>>> g.dstdata.pop('h')
>>> g.dstdata['h'] >>> g.dstdata['h']
... {} {'game': tensor([[1.], [1.], [1.]]),
'movie': tensor([[1.], [1.]])}
Notes
-----
This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
See Also See Also
-------- --------
nodes nodes
ndata
dstnodes
""" """
if len(self.dsttypes) == 1: if len(self.dsttypes) == 1:
ntype = self.dsttypes[0] ntype = self.dsttypes[0]
...@@ -1492,78 +1805,142 @@ class DGLHeteroGraph(object): ...@@ -1492,78 +1805,142 @@ class DGLHeteroGraph(object):
@property @property
def edges(self): def edges(self):
"""Return an edge view that can be used to set/get feature """Return an edge view
data of a single edge type.
One can use it for:
1. Getting the edges for a single edge type. In this case, it can take the
following optional arguments:
- form : str, optional
The return form, which can be one of the following:
- ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors
:math:`(U, V)`, representing the source and destination nodes of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge.
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
- order : str, optional
The order of the returned edges, which can be one of the following:
- ``'eid'`` (default): The edges are sorted by their IDs.
- ``'srcdst'``: The edges are sorted first by their source node IDs and then
by their destination node IDs to break ties.
- etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge
type (3-tuple of str). When an edge type appears in multiple canonical edge
types, one must use a canonical edge type. If the graph has multiple edge
types, one must specify the argument. Otherwise, it can be omitted.
2. Setting/getting features for all edges of a single edge type. To set/get a feature
``feat`` for edges of type ``etype`` in a graph ``g``, one can use
``g.edges[etype].data[feat]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all "play" relationships: >>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game') **Get the Edges for a Single Edge Type**
>>> g.edges['plays'].data['h'] = torch.zeros(3, 4)
Create a graph with a single edge type.
>>> g = dgl.graph((torch.tensor([1, 0, 0]), torch.tensor([1, 1, 0])))
>>> g.edges()
(tensor([1, 0, 0]), tensor([1, 1, 0]))
Specify a different value for :attr:`form` and :attr:`order`.
>>> g.edges(form='all', order='srcdst')
(tensor([0, 0, 1]), tensor([0, 1, 1]), tensor([2, 1, 0]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.edges(etype='plays')
(tensor([3, 4]), tensor([5, 6]))
**Set/get Features for All Edges of a Single Edge Type**
Create a heterogeneous graph of two edge types.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Set and get a feature 'h' for all edges of a single type in the heterogeneous graph.
>>> hg.edges['follows'].data['h'] = torch.ones(2, 1)
>>> hg.edges['follows'].data['h']
tensor([[1.], [1.]])
To set edge features for a graph with a single edge type, use :func:`DGLGraph.edata`.
See Also See Also
-------- --------
edata edata
""" """
# TODO(Mufei): Replace the syntax g.edges[...].edata[...] with g.edges[...][...]
return HeteroEdgeView(self) return HeteroEdgeView(self)
@property @property
def edata(self): def edata(self):
"""Return the data view of all the edges. """Return an edge data view for setting/getting edge features.
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single edge type, ``g.edata[feat]``
returns the edge feature associated with the name ``feat``. One can also set an
edge feature associated with the name ``feat`` by setting ``g.edata[feat]`` to a tensor.
If the graph has only one edge type, ``g.edata['feat']`` gives the If ``g`` is a graph of multiple edge types, ``g.edata[feat]`` returns a
edge feature data under name ``'feat'``. dict[str, Tensor] mapping canonical edge types to the edge features associated with
If the graph has multiple edge types, then ``g.edata['feat']`` the name ``feat`` for the corresponding type. One can also set an edge feature
returns a dictionary where the key is the edge type and the value associated with the name ``feat`` for some edge type(s) by setting
is the edge feature tensor. If the edge type does not have feature ``g.edata[feat]`` to a dictionary as described.
``'feat'``, it is not included in the dictionary.
Note: When the graph has multiple edge type, The key used in Notes
``g.edata['feat']`` should be the canonical_etypes, i.e. -----
(h_ntype, r_type, t_ntype). For setting features, the device of the features must be the same as the device
of the graph.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
To set features of all edges in a heterogeneous graph >>> import dgl
with only one edge type: >>> import torch
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.edata['h'] = torch.zeros(2, 5)
To set features of all edges in a heterogeneous graph Set and get feature 'h' for a graph of a single edge type.
with multiple edge types:
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie') >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV') >>> g.edata['h'] = torch.ones(2, 1)
>>> g = dgl.hetero_from_relations([g0, g1])
>>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5),
('user', 'watches', 'TV') : torch.zeros(2, 5)}
>>> g.edata['h'] >>> g.edata['h']
... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.], tensor([[1.],
... [0., 0., 0., 0., 0.], [1.]])
... [0., 0., 0., 0., 0.]]),
... ('user', 'watches', 'TV'): tensor([[0., 0., 0., 0., 0.], Set and get feature 'h' for a graph of multiple edge types.
... [0., 0., 0., 0., 0.]])}
>>> g = dgl.heterograph({
To set features of part of edges in a heterogeneous graph ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
with multiple edge types: ... ('user', 'plays', 'user'): (torch.tensor([2, 2]), torch.tensor([1, 1])),
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie') ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV') ... })
>>> g = dgl.hetero_from_relations([g0, g1]) >>> g.edata['h'] = {('user', 'follows', 'user'): torch.zeros(2, 1),
>>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5)} ... ('user', 'plays', 'user'): torch.ones(2, 1)}
>>> g.edata['h'] >>> g.edata['h']
... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.], {('user', 'follows', 'user'): tensor([[0.], [0.]]),
... [0., 0., 0., 0., 0.], ('user', 'plays', 'user'): tensor([[1.], [1.]])}
... [0., 0., 0., 0., 0.]])} >>> g.edata['h'] = {('user', 'follows', 'user'): torch.ones(2, 1)}
>>> # clean the feature 'h' and no edge type contains 'h'
>>> g.edata.pop('h')
>>> g.edata['h'] >>> g.edata['h']
... {} {('user', 'follows', 'user'): tensor([[1.], [1.]]),
('user', 'plays', 'user'): tensor([[1.], [1.]])}
See Also See Also
-------- --------
...@@ -1598,7 +1975,7 @@ class DGLHeteroGraph(object): ...@@ -1598,7 +1975,7 @@ class DGLHeteroGraph(object):
equivalent to ``self.edge_type_subgraph(etype)``. The node and edge features equivalent to ``self.edge_type_subgraph(etype)``. The node and edge features
of the returned graph would be shared with thew original graph. of the returned graph would be shared with thew original graph.
If there are multiple canonical edge type found, then the source/edge/destination If there are multiple canonical edge types found, then the source/edge/destination
node types would be a *concatenation* of original node/edge types. The node types would be a *concatenation* of original node/edge types. The
new source/destination node type would have the concatenation determined by new source/destination node type would have the concatenation determined by
:func:`dgl.combine_names() <dgl.combine_names>` called on original source/destination :func:`dgl.combine_names() <dgl.combine_names>` called on original source/destination
...@@ -1680,40 +2057,65 @@ class DGLHeteroGraph(object): ...@@ -1680,40 +2057,65 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
def number_of_nodes(self, ntype=None): def number_of_nodes(self, ntype=None):
"""Return the number of nodes of the given type in the heterograph. """Alias of :func:`num_nodes`"""
return self.num_nodes(ntype)
def num_nodes(self, ntype=None):
"""Return the number of nodes.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node type The node type for query. If given, it returns the number of nodes for a particular
in the graph. (Default: None) type. If not given (default), it returns the total number of nodes of all types.
Returns Returns
------- -------
int int
The number of nodes The number of nodes.
Examples Examples
-------- --------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') The following example uses PyTorch backend.
>>> g.number_of_nodes('user')
3 >>> import dgl
>>> g.number_of_nodes() >>> import torch
3
Create a graph with two node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_nodes('user')
5
>>> g.num_nodes('game')
7
>>> g.num_nodes()
12
""" """
return self._graph.number_of_nodes(self.get_ntype_id(ntype)) if ntype is None:
return sum([self._graph.number_of_nodes(ntid) for ntid in range(len(self.ntypes))])
else:
return self._graph.number_of_nodes(self.get_ntype_id(ntype))
def number_of_src_nodes(self, ntype=None): def number_of_src_nodes(self, ntype=None):
"""Return the number of nodes of the given SRC node type in the heterograph. """Alias of :func:`num_src_nodes`"""
return self.num_src_nodes(ntype)
The heterograph is usually a unidirectional bipartite graph. def num_src_nodes(self, ntype=None):
"""Return the number of nodes of the given source node type.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
Node type. The source node type for query. If given, it returns the number of nodes for a
If omitted, there should be only one node type in the SRC category. particular source node type. If not given (default), it returns the number of
nodes summed over all source node types.
Returns Returns
------- -------
...@@ -1722,26 +2124,52 @@ class DGLHeteroGraph(object): ...@@ -1722,26 +2124,52 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game') The following example uses PyTorch backend.
>>> g.number_of_src_nodes('user')
2 >>> import dgl
>>> g.number_of_src_nodes() >>> import torch
2
>>> g.number_of_nodes('user') Create a homogeneous graph for query.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_src_nodes()
3
Create a heterogeneous graph with two source node types -- 'developer' and 'user'.
>>> g = dgl.heterograph({
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_src_nodes('developer')
2 2
>>> g.num_src_nodes('user')
5
>>> g.num_src_nodes()
7
""" """
return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype)) if ntype is None:
return sum([self._graph.number_of_nodes(self.get_ntype_id_from_src(nty))
for nty in self.srctypes])
else:
return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype))
def number_of_dst_nodes(self, ntype=None): def number_of_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given DST node type in the heterograph. """Alias of :func:`num_dst_nodes`"""
return self.num_dst_nodes(ntype)
The heterograph is usually a unidirectional bipartite graph. def num_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given destination node type.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
Node type. The destination node type for query. If given, it returns the number of nodes for a
If omitted, there should be only one node type in the DST category. particular destination node type. If not given (default), it returns the number of
nodes summed over all destination node types.
Returns Returns
------- -------
...@@ -1750,42 +2178,95 @@ class DGLHeteroGraph(object): ...@@ -1750,42 +2178,95 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game') The following example uses PyTorch backend.
>>> g.number_of_dst_nodes('game')
3 >>> import dgl
>>> g.number_of_dst_nodes() >>> import torch
3
>>> g.number_of_nodes('game') Create a homogeneous graph for query.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_dst_nodes()
3 3
Create a heterogeneous graph with two destination node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_dst_nodes('user')
5
>>> g.num_dst_nodes('game')
7
>>> g.num_dst_nodes()
12
""" """
return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype)) if ntype is None:
return sum([self._graph.number_of_nodes(self.get_ntype_id_from_dst(nty))
for nty in self.dsttypes])
else:
return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype))
def number_of_edges(self, etype=None): def number_of_edges(self, etype=None):
"""Return the number of edges of the given type in the heterograph. """Alias of :func:`num_edges`"""
return self.num_edges(etype)
def num_edges(self, etype=None):
"""Return the number of edges.
Parameters Parameters
---------- ----------
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns Returns
------- -------
int int
The number of edges The number of edges.
Examples Examples
-------- --------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') The following example uses PyTorch backend.
>>> g.number_of_edges(('user', 'follows', 'user'))
2 >>> import dgl
>>> g.number_of_edges('follows') >>> import torch
Create a graph with three canonical edge types.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
Query for the number of edges.
>>> g.num_edges('plays')
2 2
>>> g.number_of_edges() >>> g.num_edges()
7
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.num_edges(('user', 'follows', 'user'))
2 2
>>> g.num_edges(('user', 'follows', 'game'))
3
""" """
return self._graph.number_of_edges(self.get_etype_id(etype)) if etype is None:
return sum([self._graph.number_of_edges(etid)
for etid in range(len(self.canonical_etypes))])
else:
return self._graph.number_of_edges(self.get_etype_id(etype))
def __len__(self): def __len__(self):
"""Deprecated: please directly call :func:`number_of_nodes` """Deprecated: please directly call :func:`number_of_nodes`
...@@ -1798,13 +2279,91 @@ class DGLHeteroGraph(object): ...@@ -1798,13 +2279,91 @@ class DGLHeteroGraph(object):
def is_multigraph(self): def is_multigraph(self):
"""Whether the graph is a multigraph """Whether the graph is a multigraph
In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
For a heterogeneous graph of multiple canonical edge types, we consider it as a
multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
canonical edge type.
Returns Returns
------- -------
bool bool
True if the graph is a multigraph, False otherwise. Whether the graph is a multigraph.
Notes
-----
Checking whether the graph is a multigraph can be expensive for a large one.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Check for homogeneous graphs.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 3])))
>>> g.is_multigraph
False
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 3, 3])))
>>> g.is_multigraph
True
Check for heterogeneous graphs.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
... })
>>> g.is_multigraph
False
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1]), torch.tensor([1, 2, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
... })
>>> g.is_multigraph
True
""" """
return self._graph.is_multigraph() return self._graph.is_multigraph()
@property
def is_homogeneous(self):
"""Whether the graph is a homogeneous graph.
A homogeneous graph only has one node type and one edge type.
Returns
-------
bool
Whether the graph is a homogeneous graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph for check.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
>>> g.is_homogeneous
True
Create a heterogeneous graph for check.
If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))})
>>> g.is_homogeneous
False
"""
return len(self.ntypes) == 1 and len(self.etypes) == 1
@property @property
def is_readonly(self): def is_readonly(self):
"""Deprecated: DGLGraph will always be mutable. """Deprecated: DGLGraph will always be mutable.
...@@ -1821,12 +2380,30 @@ class DGLHeteroGraph(object): ...@@ -1821,12 +2380,30 @@ class DGLHeteroGraph(object):
@property @property
def idtype(self): def idtype(self):
"""The dtype of graph index """The data type for storing the structure-related graph information
such as node and edge IDs.
Returns Returns
------- -------
backend dtype object Framework-specific device object
th.int32/th.int64 or tf.int32/tf.int64 etc. For example, this can be ``torch.int32`` or ``torch.int64`` for PyTorch.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> src_ids = torch.tensor([0, 0, 1])
>>> dst_ids = torch.tensor([1, 2, 2])
>>> g = dgl.graph((src_ids, dst_ids))
>>> g.idtype
torch.int64
>>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32)
>>> g.idtype
torch.int32
See Also See Also
-------- --------
...@@ -1854,41 +2431,68 @@ class DGLHeteroGraph(object): ...@@ -1854,41 +2431,68 @@ class DGLHeteroGraph(object):
return self.has_nodes(vid) return self.has_nodes(vid)
def has_nodes(self, vid, ntype=None): def has_nodes(self, vid, ntype=None):
"""Whether the graph has a node with a particular id and type. """Whether the graph has some particular node(s) of a given type.
Parameters Parameters
---------- ----------
vid : int, iterable, tensor vid : node ID(s)
Node ID(s). The node ID(s) for query. The allowed formats are:
- ``int``: The ID of a single node.
- ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
of integers that contains the IDs of multiple nodes.
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node type The node type for query. It is required if the graph has
in the graph. (Default: None) multiple node types.
Returns Returns
------- -------
bool or bool Tensor bool or bool Tensor
Each element is a bool flag, which is True if the node exists,
and is False otherwise. - If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
whether the graph has the particular node.
- If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
the result will be a bool Tensor whose i-th element indicates whether
the graph has node :attr:`vid[i]` of the given type.
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph with two node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([0, 1]))
... })
Query for the nodes.
>>> g.has_nodes(0, 'user') >>> g.has_nodes(0, 'user')
True True
>>> g.has_nodes(4, 'user') >>> g.has_nodes(3, 'game')
False False
>>> g.has_nodes([0, 1, 2, 3, 4], 'user') >>> g.has_nodes(torch.tensor([3, 0, 1]), 'game')
tensor([True, True, True, False, False]) tensor([False, True, True])
""" """
vid_tensor = utils.prepare_tensor(self, vid, "vid")
if len(vid_tensor) > 0 and F.as_scalar(F.min(vid_tensor, 0)) < 0 < len(vid_tensor):
raise DGLError('All IDs must be non-negative integers.')
ret = self._graph.has_nodes( ret = self._graph.has_nodes(
self.get_ntype_id(ntype), self.get_ntype_id(ntype), vid_tensor)
utils.prepare_tensor(self, vid, "vid"))
if isinstance(vid, numbers.Integral): if isinstance(vid, numbers.Integral):
return bool(F.as_scalar(ret)) return bool(F.as_scalar(ret))
else: else:
return F.astype(ret, F.bool) return F.astype(ret, F.bool)
def has_node(self, vid, ntype=None): def has_node(self, vid, ntype=None):
"""Whether the graph has a node with ids and a particular type. """Whether the graph has a particular node of a given type.
DEPRECATED: see :func:`~DGLGraph.has_nodes` DEPRECATED: see :func:`~DGLGraph.has_nodes`
""" """
...@@ -1896,38 +2500,93 @@ class DGLHeteroGraph(object): ...@@ -1896,38 +2500,93 @@ class DGLHeteroGraph(object):
return self.has_nodes(vid, ntype) return self.has_nodes(vid, ntype)
def has_edges_between(self, u, v, etype=None): def has_edges_between(self, u, v, etype=None):
"""Whether the graph has an edge (u, v) of type ``etype``. """Whether the graph has some particular edge(s) of a given type.
Parameters Parameters
---------- ----------
u : int, iterable of int, Tensor u : source node ID(s)
Source node ID(s). The source node(s) of the edges for query. The allowed formats are:
v : int, iterable of int, Tensor
Destination node ID(s). - ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
The data type and device of the tensor must be the same as the :py:attr:`idtype` and
device of the graph. Its i-th element represents the source node ID of the
i-th edge for query.
- ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
a : Tensor bool or bool Tensor
Binary tensor indicating the existence of edges. ``a[i]=1`` if the graph
contains edge ``(u[i], v[i])`` of type ``etype``, 0 otherwise. - If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
- If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
result will be a bool Tensor whose i-th element indicates whether there is an
edge from ``u[i]`` to ``v[i]`` of the given edge type.
Notes
-----
The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
number of nodes of the source and destination type.
Examples Examples
-------- --------
>>> g.has_edge_between(0, 1, ('user', 'plays', 'game')) The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for the edges.
>>> g.has_edges_between(1, 2)
True True
>>> g.has_edge_between(0, 2, ('user', 'plays', 'game')) >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]))
False tensor([ True, False])
>>> g.has_edge_between([0, 0], [1, 2], ('user', 'plays', 'game'))
tensor([1, 0]) If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), 'plays')
tensor([ True, False])
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
... ('user', 'follows', 'user'))
tensor([ True, False])
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
... ('user', 'follows', 'game'))
tensor([True, True])
""" """
srctype, _, dsttype = self.to_canonical_etype(etype)
u_tensor = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
raise DGLError('u contains invalid node IDs')
v_tensor = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v_tensor, ntype=dsttype), dim=0)) != len(v_tensor):
raise DGLError('v contains invalid node IDs')
ret = self._graph.has_edges_between( ret = self._graph.has_edges_between(
self.get_etype_id(etype), self.get_etype_id(etype),
utils.prepare_tensor(self, u, 'u'), u_tensor, v_tensor)
utils.prepare_tensor(self, v, 'v'))
if isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral): if isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral):
return bool(F.as_scalar(ret)) return bool(F.as_scalar(ret))
else: else:
...@@ -1943,79 +2602,111 @@ class DGLHeteroGraph(object): ...@@ -1943,79 +2602,111 @@ class DGLHeteroGraph(object):
return self.has_edges_between(u, v, etype) return self.has_edges_between(u, v, etype)
def predecessors(self, v, etype=None): def predecessors(self, v, etype=None):
"""Return the predecessors of node `v` in the graph with the specified """Return the predecessor(s) of a particular node with the specified edge type.
edge type.
Node `u` is a predecessor of `v` if an edge `(u, v)` with type `etype` Node ``u`` is a predecessor of node ``v`` if there is an edge ``(u, v)`` with type
exists in the graph. ``etype`` in the graph.
Parameters Parameters
---------- ----------
v : int v : int
The destination node. The destination node for query.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor Tensor
Array of predecessor node IDs with the specified edge type. The predecessors of :attr:`v` with the specified edge type.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') >>> import dgl
>>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game') >>> import torch
>>> g = dgl.hetero_from_relations([plays_g, devs_g])
>>> g.predecessors(0, 'plays') Create a homogeneous graph.
tensor([0, 1])
>>> g.predecessors(0, 'develops') >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
Query for node 1.
>>> g.predecessors(1)
tensor([0, 0])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.predecessors(1, etype='follows')
tensor([0]) tensor([0])
See Also See Also
-------- --------
successors successors
""" """
if not self.has_nodes(v, self.to_canonical_etype(etype)[-1]):
raise DGLError('Non-existing node ID {}'.format(v))
return self._graph.predecessors(self.get_etype_id(etype), v) return self._graph.predecessors(self.get_etype_id(etype), v)
def successors(self, v, etype=None): def successors(self, v, etype=None):
"""Return the successors of node `v` in the graph with the specified edge """Return the successor(s) of a particular node with the specified edge type.
type.
Node `u` is a successor of `v` if an edge `(v, u)` with type `etype` exists Node ``u`` is a successor of node ``v`` if there is an edge ``(v, u)`` with type
in the graph. ``etype`` in the graph.
Parameters Parameters
---------- ----------
v : int v : int
The source node. The source node for query.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor Tensor
Array of successor node IDs with the specified edge type. The successors of :attr:`v` with the specified edge type.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') >>> import dgl
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> import torch
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g.successors(0, 'plays') Create a homogeneous graph.
tensor([0])
>>> g.successors(0, 'follows') >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
tensor([1])
Query for node 1.
>>> g.successors(1)
tensor([2, 3])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.successors(1, etype='follows')
tensor([2])
See Also See Also
-------- --------
predecessors predecessors
""" """
if not self.has_nodes(v, self.to_canonical_etype(etype)[0]):
raise DGLError('Non-existing node ID {}'.format(v))
return self._graph.successors(self.get_etype_id(etype), v) return self._graph.successors(self.get_etype_id(etype), v)
def edge_id(self, u, v, force_multi=None, return_uv=False, etype=None): def edge_id(self, u, v, force_multi=None, return_uv=False, etype=None):
...@@ -2029,67 +2720,107 @@ class DGLHeteroGraph(object): ...@@ -2029,67 +2720,107 @@ class DGLHeteroGraph(object):
return_uv=return_uv, etype=etype) return_uv=return_uv, etype=etype)
def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None): def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
"""Return all edge IDs between source node array `u` and destination """Return the ID(s) of edge(s) from the given source node(s) to the given destination
node array `v` with the specified edge type. node(s) with the specified edge type.
Parameters Parameters
---------- ----------
u : int, list, tensor u : source node ID(s)
The node ID array of source type. The source node(s) of the edges for query. The allowed formats are:
v : int, list, tensor
The node ID array of destination type. - ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
data type an device should be the same as the :py:attr:`idtype` and device of
the graph. Its i-th element is the source node of the i-th edge for query.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
force_multi : bool, optional force_multi : bool, optional
Deprecated (Will be deleted in the future). Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
Whether to always treat the graph as a multigraph. See the multigraph, i.e. there can be multiple edges from one node to another.
"Returns" for their effects. (Default: False) return_uv : bool, optional
return_uv : bool Whether to return the source and destination node IDs along with the edges. If
See the "Returns" for their effects. (Default: False) False (default), it assumes that the graph is a simple graph and there is only
one edge from one node to another. If True, there can be multiple edges found
from one node to another.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor, or (tensor, tensor, tensor) tensor, or (tensor, tensor, tensor)
* If ``return_uv=False``, return a single edge ID array ``e``. * If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
``e[i]`` is the edge ID between ``u[i]`` and ``v[i]``. If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
element of the tensor is the ID of the edge ``(u[i], v[i])``.
* Otherwise, return three arrays ``(eu, ev, e)``. ``e[i]`` is the ID * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
of an edge between ``eu[i]`` and ``ev[i]``. All edges between ``u[i]`` ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
and ``v[i]`` are returned. from ``eu[i]`` to ``ev[i]`` in this case.
Notes Notes
----- -----
If the graph is a simple graph, ``return_uv=False``, and no edge If the graph is a simple graph, ``return_uv=False``, and there are no edges
exists between some pairs of ``u[i]`` and ``v[i]``, the result is undefined between some pairs of node(s), it will raise an error.
and an empty tensor is returned.
If the graph is a multi graph, ``return_uv=False``, and multi edges If the graph is a multigraph, ``return_uv=False``, and there are multiple edges
exist between some pairs of `u[i]` and `v[i]`, the result is undefined. between some pairs of node(s), it returns an arbitrary one from them.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') Create a homogeneous graph.
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Query for edge ids. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1, 1]), torch.tensor([1, 0, 2, 3, 2])))
>>> plays_g.edge_ids([0], [2], etype=('user', 'plays', 'game')) Query for the edges.
tensor([], dtype=torch.int64)
>>> plays_g.edge_ids([1], [2], etype=('user', 'plays', 'game')) >>> g.edge_ids(0, 0)
tensor([2]) 1
>>> g.edge_ids([1], [2], return_uv=True, etype=('user', 'follows', 'user')) >>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]))
(tensor([1, 1]), tensor([2, 2]), tensor([1, 2])) tensor([3, 0])
Get all edges for pairs of nodes.
>>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]), return_uv=True)
(tensor([1, 0]), tensor([3, 1]), tensor([3, 0]))
If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.edge_ids(torch.tensor([1]), torch.tensor([2]), etype='plays')
tensor([0])
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.edge_ids(torch.tensor([0, 1]), torch.tensor([1, 2]),
... etype=('user', 'follows', 'user'))
tensor([0, 1])
>>> g.edge_ids(torch.tensor([1, 2]), torch.tensor([2, 3]),
... etype=('user', 'follows', 'game'))
tensor([1, 2])
""" """
is_int = isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral) is_int = isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral)
srctype, _, dsttype = self.to_canonical_etype(etype)
u = utils.prepare_tensor(self, u, 'u') u = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('u contains invalid node IDs')
v = utils.prepare_tensor(self, v, 'v') v = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
raise DGLError('v contains invalid node IDs')
if force_multi is not None: if force_multi is not None:
dgl_warning("force_multi will be deprecated, " \ dgl_warning("force_multi will be deprecated, " \
"Please use return_uv instead") "Please use return_uv instead")
...@@ -2109,88 +2840,145 @@ class DGLHeteroGraph(object): ...@@ -2109,88 +2840,145 @@ class DGLHeteroGraph(object):
return F.as_scalar(eid) if is_int else eid return F.as_scalar(eid) if is_int else eid
def find_edges(self, eid, etype=None): def find_edges(self, eid, etype=None):
"""Given an edge ID array with the specified type, return the source """Return the source and destination node(s) of some particular edge(s)
and destination node ID array ``s`` and ``d``. ``s[i]`` and ``d[i]`` with the specified edge type.
are source and destination node ID for edge ``eid[i]``.
Parameters Parameters
---------- ----------
eid : list, tensor eid : edge ID(s)
The edge ID array. The IDs of the edges for query. The function expects that :attr:`eid` contains
valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
:math:`E` is the number of edges with the specified edge type.
- ``int``: An edge ID for query.
- ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type.
Returns Returns
------- -------
tensor Tensor
The source node ID array. The source node IDs of the edges, whose i-th element is the source node of the edge
tensor with ID ``eid[i]``.
The destination node ID array. Tensor
The destination node IDs of the edges, whose i-th element is the destination node of
the edge with ID ``eid[i]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game') >>> import dgl
>>> g.find_edges([0, 2], ('user', 'plays', 'game')) >>> import torch
(tensor([0, 1]), tensor([0, 2]))
>>> g.find_edges([0, 2]) Create a homogeneous graph.
(tensor([0, 1]), tensor([0, 2]))
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Find edges of IDs 0 and 2.
>>> g.find_edges(torch.tensor([0, 2]))
(tensor([0, 1]), tensor([1, 2]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.find_edges(torch.tensor([1, 0]), 'plays')
(tensor([4, 3]), tensor([6, 5]))
""" """
eid = utils.prepare_tensor(self, eid, 'eid') eid = utils.prepare_tensor(self, eid, 'eid')
if len(eid) > 0:
min_eid = F.as_scalar(F.min(eid, 0))
if min_eid < 0:
raise DGLError('Invalid edge ID {:d}'.format(min_eid))
max_eid = F.as_scalar(F.max(eid, 0))
if max_eid >= self.num_edges(etype):
raise DGLError('Invalid edge ID {:d}'.format(max_eid))
if len(eid) == 0: if len(eid) == 0:
empty = F.copy_to(F.tensor([], self.idtype), self.device) empty = F.copy_to(F.tensor([], self.idtype), self.device)
return empty, empty return empty, empty
# sanity check
max_eid = F.as_scalar(F.max(eid, dim=0))
if max_eid >= self.number_of_edges(etype):
raise DGLError('Expect edge IDs to be smaller than number of edges ({}). '
' But got {}.'.format(self.number_of_edges(etype), max_eid))
src, dst, _ = self._graph.find_edges(self.get_etype_id(etype), eid) src, dst, _ = self._graph.find_edges(self.get_etype_id(etype), eid)
return src, dst return src, dst
def in_edges(self, v, form='uv', etype=None): def in_edges(self, v, form='uv', etype=None):
"""Return the inbound edges of the node(s) with the specified type. """Return the incoming edges of some particular node(s) with the specified edge type.
Parameters Parameters
---------- ----------
v : int, list, tensor v : destination node ID(s)
The node id(s) of destination type. The destination node(s) for query. The allowed formats are:
- ``int``: The destination node for query.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
form : str, optional form : str, optional
The return form. Currently support: The return form, which can be one of the following:
- ``'eid'`` : one eid tensor - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
- ``'all'`` : a tuple ``(u, v, eid)`` the IDs of all edges.
- ``'uv'`` : a pair ``(u, v)``, default - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
representing the source and destination nodes of all edges. For each :math:`i`,
:math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor or (tensor, tensor, tensor) or (tensor, tensor) Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All inbound edges to ``v`` are returned. All incoming edges of the nodes with the specified type. For a description of the
returned result, see the description of :attr:`form`.
* If ``form='eid'``, return a tensor for the ids of the
inbound edges of the nodes with the specified type.
* If ``form='all'``, return a 3-tuple of tensors
``(eu, ev, eid)``. ``eid[i]`` gives the ID of the
edge from ``eu[i]`` to ``ev[i]``.
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
``eu[i]`` is the source node of an edge to ``ev[i]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game') >>> import dgl
>>> g.in_edges([0, 2], form='eid') >>> import torch
tensor([0, 2])
>>> g.in_edges([0, 2], form='all') Create a homogeneous graph.
(tensor([0, 1]), tensor([0, 2]), tensor([0, 2]))
>>> g.in_edges([0, 2], form='uv') >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
(tensor([0, 1]), tensor([0, 2]))
Query for the nodes 1 and 0.
>>> g.in_edges(torch.tensor([1, 0]))
(tensor([0, 0]), tensor([1, 0]))
Specify a different value for :attr:`form`.
>>> g.in_edges(torch.tensor([1, 0]), form='all')
(tensor([0, 0]), tensor([1, 0]), tensor([0, 1]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.in_edges(torch.tensor([1, 0]), etype='follows')
(tensor([0]), tensor([1]))
See Also
--------
edges
out_edges
""" """
v = utils.prepare_tensor(self, v, 'v') v = utils.prepare_tensor(self, v, 'v')
src, dst, eid = self._graph.in_edges(self.get_etype_id(etype), v) src, dst, eid = self._graph.in_edges(self.get_etype_id(etype), v)
...@@ -2204,46 +2992,80 @@ class DGLHeteroGraph(object): ...@@ -2204,46 +2992,80 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form)) raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def out_edges(self, u, form='uv', etype=None): def out_edges(self, u, form='uv', etype=None):
"""Return the outbound edges of the node(s) with the specified type. """Return the outgoing edges of some particular node(s) with the specified edge type.
Parameters Parameters
---------- ----------
u : int, list, tensor u : source node ID(s)
The node id(s) of source type. The source node(s) for query. The allowed formats are:
- ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
form : str, optional form : str, optional
The return form. Currently support: The return form, which can be one of the following:
- ``'eid'`` : one eid tensor - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
- ``'all'`` : a tuple ``(u, v, eid)`` the IDs of all edges.
- ``'uv'`` : a pair ``(u, v)``, default - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
representing the source and destination nodes of all edges. For each :math:`i`,
:math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All outgoing edges of the nodes with the specified type. For a description of the
returned result, see the description of :attr:`form`.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for the nodes 1 and 2.
>>> g.out_edges(torch.tensor([1, 2]))
(tensor([1, 1]), tensor([2, 3]))
Specify a different value for :attr:`form`.
>>> g.out_edges(torch.tensor([1, 2]), form='all')
(tensor([1, 1]), tensor([2, 3]), tensor([2, 3]))
Returns For a graph of multiple edge types, it is required to specify the edge type in query.
-------
tensor or (tensor, tensor, tensor) or (tensor, tensor)
All outbound edges from ``u`` are returned.
* If ``form='eid'``, return a tensor for the ids of the outbound edges >>> hg = dgl.heterograph({
of the nodes with the specified type. ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
* If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``. ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``. ... })
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``. >>> hg.out_edges(torch.tensor([1, 2]), etype='follows')
``ev[i]`` is the destination node of the edge from ``eu[i]``. (tensor([1]), tensor([2]))
Examples See Also
-------- --------
edges
>>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game') in_edges
>>> g.out_edges([0, 1], form='eid')
tensor([0, 1, 2])
>>> g.out_edges([0, 1], form='all')
(tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([0, 1, 2]))
>>> g.out_edges([0, 1], form='uv')
(tensor([0, 1, 1]), tensor([0, 1, 2]))
""" """
u = utils.prepare_tensor(self, u, 'u') u = utils.prepare_tensor(self, u, 'u')
srctype, _, _ = self.to_canonical_etype(etype)
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('u contains invalid node IDs')
src, dst, eid = self._graph.out_edges(self.get_etype_id(etype), u) src, dst, eid = self._graph.out_edges(self.get_etype_id(etype), u)
if form == 'all': if form == 'all':
return src, dst, eid return src, dst, eid
...@@ -2254,49 +3076,75 @@ class DGLHeteroGraph(object): ...@@ -2254,49 +3076,75 @@ class DGLHeteroGraph(object):
else: else:
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form)) raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def all_edges(self, form='uv', order=None, etype=None): def all_edges(self, form='uv', order='eid', etype=None):
"""Return all edges with the specified type. """Return all edges with the specified edge type.
Parameters Parameters
---------- ----------
form : str, optional form : str, optional
The return form. Currently support: The return form, which can be one of the following:
- ``'eid'`` : one eid tensor - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
- ``'all'`` : a tuple ``(u, v, eid)`` the IDs of all edges.
- ``'uv'`` : a pair ``(u, v)``, default - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
order : str or None representing the source and destination nodes of all edges. For each :math:`i`,
The order of the returned edges. Currently support: :math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
- ``'srcdst'`` : sorted by their src and dst ids. representing the source nodes, destination nodes and IDs of all edges.
- ``'eid'`` : sorted by edge Ids. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
- ``None`` : arbitrary order, default order : str, optional
The order of the returned edges, which can be one of the following:
- ``'srcdst'``: The edges are sorted first by their source node IDs and then
by their destination node IDs to break ties.
- ``'eid'`` (default): The edges are sorted by their IDs.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor or (tensor, tensor, tensor) or (tensor, tensor) Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All edges of the specified edge type. For a description of the returned result,
* If ``form='eid'``, return a tensor for the ids of all edges see the description of :attr:`form`.
with the specified type.
* If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``.
``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``.
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
``ev[i]`` is the destination node of the edge from ``eu[i]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> g = dgl.bipartite(([1, 0, 1], [1, 0, 2]), 'user', 'plays', 'game') >>> import dgl
>>> g.all_edges(form='eid', order='srcdst') >>> import torch
tensor([1, 0, 2])
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for edges.
>>> g.all_edges()
(tensor([0, 0, 1, 1]), tensor([1, 0, 2, 3]))
Specify a different value for :attr:`form` and :attr:`order`.
>>> g.all_edges(form='all', order='srcdst') >>> g.all_edges(form='all', order='srcdst')
(tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([1, 0, 2])) (tensor([0, 0, 1, 1]), tensor([0, 1, 2, 3]), tensor([1, 0, 2, 3]))
>>> g.all_edges(form='uv', order='eid')
(tensor([1, 0, 1]), tensor([1, 0, 2])) For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.all_edges(etype='plays')
(tensor([3, 4]), tensor([5, 6]))
See Also
--------
edges
in_edges
out_edges
""" """
src, dst, eid = self._graph.edges(self.get_etype_id(etype), order) src, dst, eid = self._graph.edges(self.get_etype_id(etype), order)
if form == 'all': if form == 'all':
...@@ -2317,46 +3165,78 @@ class DGLHeteroGraph(object): ...@@ -2317,46 +3165,78 @@ class DGLHeteroGraph(object):
return self.in_degrees(v, etype) return self.in_degrees(v, etype)
def in_degrees(self, v=ALL, etype=None): def in_degrees(self, v=ALL, etype=None):
"""Return the in-degrees of nodes v with edges of type ``etype``. """Return the in-degree(s) of some particular node(s) with the specified edge type.
Parameters Parameters
---------- ----------
v : int, iterable of int or tensor, optional. v : destination node ID(s), optional
The node ID array of the destination type. Default is to return the The destination node(s) for query. The allowed formats are:
degrees of all nodes.
etype : str or tuple of str or None, optional - ``int``: The destination node for query.
The edge type. Can be omitted if there is only one edge type - ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
in the graph. (Default: None) type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
d : tensor or int tensor or int
The in-degree array. ``d[i]`` gives the in-degree of node ``v[i]`` The in-degree(s) of the node(s).
with edges of type ``etype``. If the argument is an integer, so will
be the return. - If :attr:`v` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the in-degree of the
node ``v[i]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') Create a homogeneous graph.
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Query for node degree. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
>>> g.in_degrees(0, 'plays') Query for all nodes.
2
>>> g.in_degrees(etype='follows') >>> g.in_degrees()
tensor([0, 1, 2]) tensor([0, 2, 1, 1])
Query for nodes 1 and 2.
>>> g.in_degrees(torch.tensor([1, 2]))
tensor([2, 1])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.in_degrees(torch.tensor([1, 0]), etype='follows')
tensor([1, 0])
See Also
--------
out_degrees
""" """
dsttype = self.to_canonical_etype(etype)[2] dsttype = self.to_canonical_etype(etype)[2]
etid = self.get_etype_id(etype) etid = self.get_etype_id(etype)
if is_all(v): if is_all(v):
v = self.dstnodes(dsttype) v = self.dstnodes(dsttype)
deg = self._graph.in_degrees(etid, utils.prepare_tensor(self, v, 'v')) v_tensor = utils.prepare_tensor(self, v, 'v')
deg = self._graph.in_degrees(etid, v_tensor)
if isinstance(v, numbers.Integral): if isinstance(v, numbers.Integral):
return F.as_scalar(deg) return F.as_scalar(deg)
else: else:
...@@ -2371,48 +3251,78 @@ class DGLHeteroGraph(object): ...@@ -2371,48 +3251,78 @@ class DGLHeteroGraph(object):
return self.out_degrees(u, etype) return self.out_degrees(u, etype)
def out_degrees(self, u=ALL, etype=None): def out_degrees(self, u=ALL, etype=None):
"""Return the out-degrees of nodes u with edges of type ``etype``. """Return the out-degree(s) of some particular node(s) with the specified edge type.
Parameters Parameters
---------- ----------
u : list, tensor u : source node ID(s), optional
The node ID array of source type. Default is to return the degrees
of all the nodes. - ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
d : tensor tensor or int
The out-degree array. ``d[i]`` gives the out-degree of node ``u[i]`` The out-degree(s) of the node(s).
with edges of type ``etype``.
- If :attr:`u` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the out-degree of the
node ``v[i]``.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Instantiate a heterograph. >>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') Create a homogeneous graph.
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Query for node degree. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
>>> g.out_degrees(0, 'plays') Query for all nodes.
1
>>> g.out_degrees(etype='follows') >>> g.out_degrees()
tensor([1, 2, 0]) tensor([2, 2, 0, 0])
Query for nodes 1 and 2.
>>> g.out_degrees(torch.tensor([1, 2]))
tensor([2, 0])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.out_degrees(torch.tensor([1, 0]), etype='follows')
tensor([1, 1])
See Also See Also
-------- --------
out_degree in_degrees
""" """
srctype = self.to_canonical_etype(etype)[0] srctype = self.to_canonical_etype(etype)[0]
etid = self.get_etype_id(etype) etid = self.get_etype_id(etype)
if is_all(u): if is_all(u):
u = self.srcnodes(srctype) u = self.srcnodes(srctype)
u_tensor = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
raise DGLError('u contains invalid node IDs')
deg = self._graph.out_degrees(etid, utils.prepare_tensor(self, u, 'u')) deg = self._graph.out_degrees(etid, utils.prepare_tensor(self, u, 'u'))
if isinstance(u, numbers.Integral): if isinstance(u, numbers.Integral):
return F.as_scalar(deg) return F.as_scalar(deg)
...@@ -2420,6 +3330,10 @@ class DGLHeteroGraph(object): ...@@ -2420,6 +3330,10 @@ class DGLHeteroGraph(object):
return deg return deg
def adjacency_matrix(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None): def adjacency_matrix(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :func:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Return the adjacency matrix of edges of the given edge type. """Return the adjacency matrix of edges of the given edge type.
By default, a row of returned adjacency matrix represents the By default, a row of returned adjacency matrix represents the
...@@ -2437,9 +3351,12 @@ class DGLHeteroGraph(object): ...@@ -2437,9 +3351,12 @@ class DGLHeteroGraph(object):
scipy_fmt : str, optional scipy_fmt : str, optional
If specified, return a scipy sparse matrix in the given format. If specified, return a scipy sparse matrix in the given format.
Otherwise, return a backend dependent sparse tensor. (Default: None) Otherwise, return a backend dependent sparse tensor. (Default: None)
etype : str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns Returns
------- -------
...@@ -2449,15 +3366,21 @@ class DGLHeteroGraph(object): ...@@ -2449,15 +3366,21 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterogeneous graph. Instantiate a heterogeneous graph.
>>> follows_g = dgl.graph(([0, 1], [0, 1]), 'user', 'follows') >>> g = dgl.heterograph({
>>> devs_g = dgl.bipartite(([0, 1], [0, 2]), 'developer', 'develops', 'game') ... ('user', 'follows', 'user'): ([0, 1], [0, 1]),
>>> g = dgl.hetero_from_relations([follows_g, devs_g]) ... ('developer', 'develops', 'game'): ([0, 1], [0, 2])
... })
Get a backend dependent sparse tensor. Here we use PyTorch for example. Get a backend dependent sparse tensor. Here we use PyTorch for example.
>>> g.adjacency_matrix(etype='develops') >>> g.adj(etype='develops')
tensor(indices=tensor([[0, 2], tensor(indices=tensor([[0, 2],
[0, 1]]), [0, 1]]),
values=tensor([1., 1.]), values=tensor([1., 1.]),
...@@ -2465,7 +3388,7 @@ class DGLHeteroGraph(object): ...@@ -2465,7 +3388,7 @@ class DGLHeteroGraph(object):
Get a scipy coo sparse matrix. Get a scipy coo sparse matrix.
>>> g.adjacency_matrix(scipy_fmt='coo', etype='develops') >>> g.adj(scipy_fmt='coo', etype='develops')
<3x2 sparse matrix of type '<class 'numpy.int64'>' <3x2 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in COOrdinate format> with 2 stored elements in COOrdinate format>
""" """
...@@ -2482,9 +3405,6 @@ class DGLHeteroGraph(object): ...@@ -2482,9 +3405,6 @@ class DGLHeteroGraph(object):
else: else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False) return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
# Alias of ``adjacency_matrix``
adj = adjacency_matrix
def adjacency_matrix_scipy(self, transpose=None, fmt='csr', return_edge_ids=None): def adjacency_matrix_scipy(self, transpose=None, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``. """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
""" """
...@@ -2495,6 +3415,10 @@ class DGLHeteroGraph(object): ...@@ -2495,6 +3415,10 @@ class DGLHeteroGraph(object):
return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt) return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)
def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None): def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
"""Alias of :func:`inc`"""
return self.inc(typestr, ctx, etype)
def inc(self, typestr, ctx=F.cpu(), etype=None):
"""Return the incidence matrix representation of edges with the given """Return the incidence matrix representation of edges with the given
edge type. edge type.
...@@ -2529,9 +3453,12 @@ class DGLHeteroGraph(object): ...@@ -2529,9 +3453,12 @@ class DGLHeteroGraph(object):
Can be either ``in``, ``out`` or ``both`` Can be either ``in``, ``out`` or ``both``
ctx : context, optional ctx : context, optional
The context of returned incidence matrix. (Default: cpu) The context of returned incidence matrix. (Default: cpu)
etype : str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns Returns
------- -------
...@@ -2541,18 +3468,22 @@ class DGLHeteroGraph(object): ...@@ -2541,18 +3468,22 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
>>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows') The following example uses PyTorch backend.
>>> g.incidence_matrix('in')
>>> import dgl
>>> g = dgl.graph(([0, 1], [0, 2]))
>>> g.inc('in')
tensor(indices=tensor([[0, 2], tensor(indices=tensor([[0, 2],
[0, 1]]), [0, 1]]),
values=tensor([1., 1.]), values=tensor([1., 1.]),
size=(3, 2), nnz=2, layout=torch.sparse_coo) size=(3, 2), nnz=2, layout=torch.sparse_coo)
>>> g.incidence_matrix('out') >>> g.inc('out')
tensor(indices=tensor([[0, 1], tensor(indices=tensor([[0, 1],
[0, 1]]), [0, 1]]),
values=tensor([1., 1.]), values=tensor([1., 1.]),
size=(3, 2), nnz=2, layout=torch.sparse_coo) size=(3, 2), nnz=2, layout=torch.sparse_coo)
>>> g.incidence_matrix('both') >>> g.inc('both')
tensor(indices=tensor([[1, 2], tensor(indices=tensor([[1, 2],
[1, 1]]), [1, 1]]),
values=tensor([-1., 1.]), values=tensor([-1., 1.]),
...@@ -2561,9 +3492,6 @@ class DGLHeteroGraph(object): ...@@ -2561,9 +3492,6 @@ class DGLHeteroGraph(object):
etid = self.get_etype_id(etype) etid = self.get_etype_id(etype)
return self._graph.incidence_matrix(etid, typestr, ctx)[0] return self._graph.incidence_matrix(etid, typestr, ctx)[0]
# Alias of ``incidence_matrix``
inc = incidence_matrix
################################################################# #################################################################
# Features # Features
################################################################# #################################################################
...@@ -2571,29 +3499,44 @@ class DGLHeteroGraph(object): ...@@ -2571,29 +3499,44 @@ class DGLHeteroGraph(object):
def node_attr_schemes(self, ntype=None): def node_attr_schemes(self, ntype=None):
"""Return the node feature schemes for the specified type. """Return the node feature schemes for the specified type.
Each feature scheme is a named tuple that stores the shape and data type The scheme of a feature describes the shape and data type of it.
of the node feature.
Parameters Parameters
---------- ----------
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node The node type for query. If the graph has multiple node types, one must
type in the graph. Error will be raised otherwise. specify the argument. Otherwise, it can be omitted.
(Default: None)
Returns Returns
------- -------
dict of str to schemes dict[str, Scheme]
The schemes of node feature columns. A dictionary mapping a feature name to its associated feature scheme.
Examples Examples
-------- --------
The following uses PyTorch backend. The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a homogeneous graph.
>>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows') >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.nodes['user'].data['h'] = torch.randn(3, 4) >>> g.ndata['h1'] = torch.randn(3, 1)
>>> g.ndata['h2'] = torch.randn(3, 2)
>>> g.node_attr_schemes()
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
Query for a heterogeneous graph of multiple node types.
>>> g = dgl.heterograph({('user', 'plays', 'game'):
... (torch.tensor([1, 2]), torch.tensor([3, 4]))})
>>> g.nodes['user'].data['h1'] = torch.randn(3, 1)
>>> g.nodes['user'].data['h2'] = torch.randn(3, 2)
>>> g.node_attr_schemes('user') >>> g.node_attr_schemes('user')
{'h': Scheme(shape=(4,), dtype=torch.float32)} {'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
See Also See Also
-------- --------
...@@ -2604,28 +3547,48 @@ class DGLHeteroGraph(object): ...@@ -2604,28 +3547,48 @@ class DGLHeteroGraph(object):
def edge_attr_schemes(self, etype=None): def edge_attr_schemes(self, etype=None):
"""Return the edge feature schemes for the specified type. """Return the edge feature schemes for the specified type.
Each feature scheme is a named tuple that stores the shape and data type The scheme of a feature describes the shape and data type of it.
of the edge feature.
Parameters Parameters
---------- ----------
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. (Default: None) (3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
dict of str to schemes dict[str, Scheme]
The schemes of edge feature columns. A dictionary mapping a feature name to its associated feature scheme.
Examples Examples
-------- --------
The following uses PyTorch backend. The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a homogeneous graph.
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.edges['user', 'plays', 'game'].data['h'] = torch.randn(4, 4) >>> g.edata['h1'] = torch.randn(2, 1)
>>> g.edge_attr_schemes(('user', 'plays', 'game')) >>> g.edata['h2'] = torch.randn(2, 2)
{'h': Scheme(shape=(4,), dtype=torch.float32)} >>> g.edge_attr_schemes()
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
Query for a heterogeneous graph of multiple edge types.
>>> g = dgl.heterograph({('user', 'plays', 'game'):
... (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('user', 'follows', 'user'):
... (torch.tensor([3, 4]), torch.tensor([5, 6]))})
>>> g.edges['plays'].data['h1'] = torch.randn(2, 1)
>>> g.edges['plays'].data['h2'] = torch.randn(2, 2)
>>> g.edge_attr_schemes('plays')
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
See Also See Also
-------- --------
...@@ -2634,67 +3597,180 @@ class DGLHeteroGraph(object): ...@@ -2634,67 +3597,180 @@ class DGLHeteroGraph(object):
return self._edge_frames[self.get_etype_id(etype)].schemes return self._edge_frames[self.get_etype_id(etype)].schemes
def set_n_initializer(self, initializer, field=None, ntype=None): def set_n_initializer(self, initializer, field=None, ntype=None):
"""Set the initializer for empty node features. """Set the initializer for node features.
Initializer is a callable that returns a tensor given the shape, data type
and device context.
When a subset of the nodes are assigned a new feature, initializer is When only part of the nodes have a feature (e.g. new nodes are added,
used to create feature for the rest of the nodes. features are set for a subset of nodes), the initializer initializes
features for the rest nodes.
Parameters Parameters
---------- ----------
initializer : callable initializer : callable
The initializer, mapping (shape, data type, context) to tensor. A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
The tensor will be the initialized features. The arguments are:
- ``shape``: The shape of the tensor to return, which is a tuple of int.
The first dimension is the number of nodes for feature initialization.
- ``dtype``: The data type of the tensor to return, which is a
framework-specific data type object.
- ``ctx``: The device of the tensor to return, which is a framework-specific
device object.
- ``id_range``: The start and end ID of the nodes for feature initialization,
which is a slice.
field : str, optional field : str, optional
The feature field name. Default is to set an initializer for all the The name of the feature that the initializer applies. If not given, the
feature fields. initializer applies to all features.
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node The type of the nodes that the initializer applies. If the graph has
type in the graph. Error will be raised otherwise. multiple node types, one must specify the argument. Otherwise, it can
(Default: None) be omitted.
Note Notes
----- -----
User defined initializer must follow the signature of Without setting a node feature initializer, zero tensors are generated
:func:`dgl.init.base_initializer() <dgl.init.base_initializer>` for nodes without a feature.
See Also Examples
-------- --------
set_e_initializer
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Define a function for initializer.
>>> def init_feats(shape, dtype, device, id_range):
... return torch.ones(shape, dtype=dtype, device=device)
An example for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
>>> g.ndata['h1'] = torch.zeros(2, 2)
>>> g.ndata['h2'] = torch.ones(2, 1)
>>> # Apply the initializer to feature 'h2' only.
>>> g.set_n_initializer(init_feats, field='h2')
>>> g.add_nodes(1)
>>> print(g.ndata['h1'])
tensor([[0., 0.],
[0., 0.],
[0., 0.]])
>>> print(g.ndata['h2'])
tensor([[1.], [1.], [1.]])
An example for a heterogeneous graph of multiple node types.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.nodes['user'].data['h'] = torch.zeros(3, 2)
>>> g.nodes['game'].data['w'] = torch.ones(2, 2)
>>> g.set_n_initializer(init_feats, ntype='game')
>>> g.add_nodes(1, ntype='user')
>>> # Initializer not set for 'user', use zero tensors by default
>>> g.nodes['user'].data['h']
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]])
>>> # Initializer set for 'game'
>>> g.add_nodes(1, ntype='game')
>>> g.nodes['game'].data['w']
tensor([[1., 1.],
[1., 1.],
[1., 1.]])
""" """
ntid = self.get_ntype_id(ntype) ntid = self.get_ntype_id(ntype)
self._node_frames[ntid].set_initializer(initializer, field) self._node_frames[ntid].set_initializer(initializer, field)
def set_e_initializer(self, initializer, field=None, etype=None): def set_e_initializer(self, initializer, field=None, etype=None):
"""Set the initializer for empty edge features. """Set the initializer for edge features.
Initializer is a callable that returns a tensor given the shape, data When only part of the edges have a feature (e.g. new edges are added,
type and device context. features are set for a subset of edges), the initializer initializes
features for the rest edges.
When a subset of the edges are assigned a new feature, initializer is
used to create feature for rest of the edges.
Parameters Parameters
---------- ----------
initializer : callable initializer : callable
The initializer, mapping (shape, data type, context) to tensor. A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
The tensor will be the initialized features. The arguments are:
- ``shape``: The shape of the tensor to return, which is a tuple of int.
The first dimension is the number of edges for feature initialization.
- ``dtype``: The data type of the tensor to return, which is a
framework-specific data type object.
- ``ctx``: The device of the tensor to return, which is a framework-specific
device object.
- ``id_range``: The start and end ID of the edges for feature initialization,
which is a slice.
field : str, optional field : str, optional
The feature field name. Default is set an initializer for all the The name of the feature that the initializer applies. If not given, the
feature fields. initializer applies to all features.
etype : str or tuple of str, optional etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type The edge type for query, which can be an edge type (str) or a canonical edge type
in the graph. Error will be raised otherwise. (3-tuple of str). When an edge type appears in multiple canonical edge types, one
(Default: None) must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Note Notes
----- -----
User defined initializer must follow the signature of Without setting an edge feature initializer, zero tensors are generated
:func:`dgl.init.base_initializer() <dgl.init.base_initializer>` for edges without a feature.
See Also Examples
-------- --------
set_n_initializer
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Define a function for initializer.
>>> def init_feats(shape, dtype, device, id_range):
... return torch.ones(shape, dtype=dtype, device=device)
An example for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
>>> g.edata['h1'] = torch.zeros(1, 2)
>>> g.edata['h2'] = torch.ones(1, 1)
>>> # Apply the initializer to feature 'h2' only.
>>> g.set_e_initializer(init_feats, field='h2')
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]))
>>> print(g.edata['h1'])
tensor([[0., 0.],
[0., 0.]])
>>> print(g.edata['h2'])
tensor([[1.], [1.]])
An example for a heterogeneous graph of multiple edge types.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 0])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.edges['plays'].data['h'] = torch.zeros(2, 2)
>>> g.edges['develops'].data['w'] = torch.ones(2, 2)
>>> g.set_e_initializer(init_feats, etype='plays')
>>> # Initializer not set for 'develops', use zero tensors by default
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='develops')
>>> g.edges['develops'].data['w']
tensor([[1., 1.],
[1., 1.],
[0., 0.]])
>>> # Initializer set for 'plays'
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='plays')
>>> g.edges['plays'].data['h']
tensor([[0., 0.],
[0., 0.],
[1., 1.]])
""" """
etid = self.get_etype_id(etype) etid = self.get_etype_id(etype)
self._edge_frames[etid].set_initializer(initializer, field) self._edge_frames[etid].set_initializer(initializer, field)
...@@ -2897,7 +3973,7 @@ class DGLHeteroGraph(object): ...@@ -2897,7 +3973,7 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.ones(3, 5) >>> g.nodes['user'].data['h'] = torch.ones(3, 5)
>>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user') >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
>>> g.nodes['user'].data['h'] >>> g.nodes['user'].data['h']
...@@ -2942,7 +4018,7 @@ class DGLHeteroGraph(object): ...@@ -2942,7 +4018,7 @@ class DGLHeteroGraph(object):
Examples Examples
-------- --------
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
>>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5) >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
>>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2}) >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
>>> g.edges[('user', 'plays', 'game')].data['h'] >>> g.edges[('user', 'plays', 'game')].data['h']
...@@ -3021,12 +4097,13 @@ class DGLHeteroGraph(object): ...@@ -3021,12 +4097,13 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn >>> import dgl.function as fn
>>> import torch >>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
>>> g = dgl.hetero_from_relations([follows_g, plays_g]) ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
>>> g.send_and_recv(g['follows'].edges(), fn.copy_src('h', 'm'), >>> g.send_and_recv(g['follows'].edges(), fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows') ... fn.sum('m', 'h'), etype='follows')
>>> g.nodes['user'].data['h'] >>> g.nodes['user'].data['h']
tensor([[0.], tensor([[0.],
[0.], [0.],
...@@ -3045,7 +4122,8 @@ class DGLHeteroGraph(object): ...@@ -3045,7 +4122,8 @@ class DGLHeteroGraph(object):
return return
u, v = self.find_edges(eid, etype=etype) u, v = self.find_edges(eid, etype=etype)
# call message passing onsubgraph # call message passing onsubgraph
ndata = core.message_passing(_create_compute_graph(self, u, v, eid), g = self if etype is None else self[etype]
ndata = core.message_passing(_create_compute_graph(g, u, v, eid),
message_func, reduce_func, apply_node_func) message_func, reduce_func, apply_node_func)
dstnodes = F.unique(v) dstnodes = F.unique(v)
self._set_n_repr(dtid, dstnodes, ndata) self._set_n_repr(dtid, dstnodes, ndata)
...@@ -3106,9 +4184,10 @@ class DGLHeteroGraph(object): ...@@ -3106,9 +4184,10 @@ class DGLHeteroGraph(object):
Instantiate a heterograph. Instantiate a heterograph.
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({
>>> plays_g = dgl.bipartite(([0, 2], [0, 1]), 'user', 'plays', 'game') ... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
>>> g = dgl.hetero_from_relations([follows_g, plays_g]) ... ('user', 'plays', 'game'): ([0, 2], [0, 1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Pull. Pull.
...@@ -3131,7 +4210,7 @@ class DGLHeteroGraph(object): ...@@ -3131,7 +4210,7 @@ class DGLHeteroGraph(object):
g = self if etype is None else self[etype] g = self if etype is None else self[etype]
# call message passing on subgraph # call message passing on subgraph
src, dst, eid = g.in_edges(v, form='all') src, dst, eid = g.in_edges(v, form='all')
ndata = core.message_passing(_create_compute_graph(self, src, dst, eid, v), ndata = core.message_passing(_create_compute_graph(g, src, dst, eid, v),
message_func, reduce_func, apply_node_func) message_func, reduce_func, apply_node_func)
self._set_n_repr(dtid, v, ndata) self._set_n_repr(dtid, v, ndata)
...@@ -3182,7 +4261,7 @@ class DGLHeteroGraph(object): ...@@ -3182,7 +4261,7 @@ class DGLHeteroGraph(object):
Instantiate a heterograph. Instantiate a heterograph.
>>> g = dgl.graph(([0, 0], [1, 2]), 'user', 'follows') >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Push. Push.
...@@ -3240,7 +4319,7 @@ class DGLHeteroGraph(object): ...@@ -3240,7 +4319,7 @@ class DGLHeteroGraph(object):
Instantiate a heterograph. Instantiate a heterograph.
>>> g = dgl.graph(([0, 1, 2], [1, 2, 2]), 'user', 'follows') >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})
Update all. Update all.
...@@ -3301,18 +4380,19 @@ class DGLHeteroGraph(object): ...@@ -3301,18 +4380,19 @@ class DGLHeteroGraph(object):
Instantiate a heterograph. Instantiate a heterograph.
>>> g1 = dgl.graph(([0, 1], [1, 1]), 'user', 'follows') >>> g = dgl.heterograph({
>>> g2 = dgl.bipartite(([0], [1]), 'game', 'attracts', 'user') ... ('user', 'follows', 'user'): ([0, 1], [1, 1]),
>>> g = dgl.hetero_from_relations([g1, g2]) ... ('game', 'attracts', 'user'): ([0], [1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]])
>>> g.nodes['game'].data['h'] = torch.tensor([[1.]]) >>> g.nodes['game'].data['h'] = torch.tensor([[1.]])
Update all. Update all.
>>> g.multi_update_all( >>> g.multi_update_all(
>>> {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')), ... {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')),
>>> 'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))}, ... 'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))},
>>> "sum") ... "sum")
>>> g.nodes['user'].data['h'] >>> g.nodes['user'].data['h']
tensor([[0.], tensor([[0.],
[4.]]) [4.]])
...@@ -3327,7 +4407,8 @@ class DGLHeteroGraph(object): ...@@ -3327,7 +4407,8 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid arguments for edge type "{}". Should be ' raise DGLError('Invalid arguments for edge type "{}". Should be '
'(msg_func, reduce_func, [apply_node_func])'.format(etype)) '(msg_func, reduce_func, [apply_node_func])'.format(etype))
mfunc, rfunc, afunc = args mfunc, rfunc, afunc = args
all_out[dtid].append(core.message_passing(self[etype], mfunc, rfunc, afunc)) g = self if etype is None else self[etype]
all_out[dtid].append(core.message_passing(g, mfunc, rfunc, afunc))
merge_order[dtid].append(etid) # use edge type id as merge order hint merge_order[dtid].append(etid) # use edge type id as merge order hint
for dtid, frames in all_out.items(): for dtid, frames in all_out.items():
# merge by cross_reducer # merge by cross_reducer
...@@ -3381,10 +4462,10 @@ class DGLHeteroGraph(object): ...@@ -3381,10 +4462,10 @@ class DGLHeteroGraph(object):
Instantiate a heterogrph and perform multiple rounds of message passing. Instantiate a heterogrph and perform multiple rounds of message passing.
>>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows') >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
>>> g['follows'].prop_nodes([[2, 3], [4]], fn.copy_src('h', 'm'), >>> g['follows'].prop_nodes([[2, 3], [4]], fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows') ... fn.sum('m', 'h'), etype='follows')
tensor([[1.], tensor([[1.],
[2.], [2.],
[1.], [1.],
...@@ -3439,10 +4520,10 @@ class DGLHeteroGraph(object): ...@@ -3439,10 +4520,10 @@ class DGLHeteroGraph(object):
Instantiate a heterogrph and perform multiple rounds of message passing. Instantiate a heterogrph and perform multiple rounds of message passing.
>>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows') >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]]) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
>>> g['follows'].prop_edges([[0, 1], [2, 3]], fn.copy_src('h', 'm'), >>> g['follows'].prop_edges([[0, 1], [2, 3]], fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows') ... fn.sum('m', 'h'), etype='follows')
>>> g.nodes['user'].data['h'] >>> g.nodes['user'].data['h']
tensor([[1.], tensor([[1.],
[2.], [2.],
...@@ -3463,38 +4544,78 @@ class DGLHeteroGraph(object): ...@@ -3463,38 +4544,78 @@ class DGLHeteroGraph(object):
################################################################# #################################################################
def filter_nodes(self, predicate, nodes=ALL, ntype=None): def filter_nodes(self, predicate, nodes=ALL, ntype=None):
"""Return a tensor of node IDs with the given node type that satisfy """Return the IDs of the nodes with the given node type that satisfy
the given predicate. the given predicate.
Parameters Parameters
---------- ----------
predicate : callable predicate : callable
A function of signature ``func(nodes) -> tensor``. A function of signature ``func(nodes) -> Tensor``.
``nodes`` are :class:`NodeBatch` objects as in :mod:`~dgl.udf`. ``nodes`` are :class:`dgl.NodeBatch` objects.
The ``tensor`` returned should be a 1-D boolean tensor with Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding node in each element indicating whether the corresponding node in
the batch satisfies the predicate. the batch satisfies the predicate.
nodes : int, iterable or tensor of ints nodes : node ID(s), optional
The nodes to filter on. Default value is all the nodes. The node(s) for query. The allowed formats are:
- Tensor: A 1D tensor that contains the node(s) for query, whose data type
and device should be the same as the :py:attr:`idtype` and device of the graph.
- iterable[int] : Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
ntype : str, optional ntype : str, optional
The node type. Can be omitted if there is only one node type The node type for query. If the graph has multiple node types, one must
in the graph. (Default: None) specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor tensor
Node ids indicating the nodes that satisfy the predicate. A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.
Examples Examples
-------- --------
>>> import torch
The following example uses PyTorch backend.
>>> import dgl >>> import dgl
>>> import dgl.function as fn >>> import torch
>>> g = dgl.graph([], 'user', 'follows', num_nodes=4)
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]]) Define a predicate function.
>>> g.filter_nodes(lambda nodes: (nodes.data['h'] == 1.).squeeze(1), ntype='user')
>>> def nodes_with_feature_one(nodes):
... # Whether a node has feature 1
... return (nodes.data['h'] == 1.).squeeze(1)
Filter nodes for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g.ndata['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> print(g.filter_nodes(nodes_with_feature_one))
tensor([1, 2])
Filter on nodes with IDs 0 and 1
>>> print(g.filter_nodes(nodes_with_feature_one, nodes=torch.tensor([0, 1])))
tensor([1])
Filter nodes for a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1]))})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.]])
>>> g.nodes['game'].data['h'] = torch.tensor([[0.], [1.]])
>>> # Filter for 'user' nodes
>>> print(g.filter_nodes(nodes_with_feature_one, ntype='user'))
tensor([1, 2]) tensor([1, 2])
""" """
if is_all(nodes):
nodes = self.nodes(ntype)
v = utils.prepare_tensor(self, nodes, 'nodes')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=ntype), dim=0)) != len(v):
raise DGLError('v contains invalid node IDs')
with self.local_scope(): with self.local_scope():
self.apply_nodes(lambda nbatch: {'_mask' : predicate(nbatch)}, nodes, ntype) self.apply_nodes(lambda nbatch: {'_mask' : predicate(nbatch)}, nodes, ntype)
ntype = self.ntypes[0] if ntype is None else ntype ntype = self.ntypes[0] if ntype is None else ntype
...@@ -3502,43 +4623,105 @@ class DGLHeteroGraph(object): ...@@ -3502,43 +4623,105 @@ class DGLHeteroGraph(object):
if is_all(nodes): if is_all(nodes):
return F.nonzero_1d(mask) return F.nonzero_1d(mask)
else: else:
v = utils.prepare_tensor(self, nodes, 'nodes')
return F.boolean_mask(v, F.gather_row(mask, v)) return F.boolean_mask(v, F.gather_row(mask, v))
def filter_edges(self, predicate, edges=ALL, etype=None): def filter_edges(self, predicate, edges=ALL, etype=None):
"""Return a tensor of edge IDs with the given edge type that satisfy """Return the IDs of the edges with the given edge type that satisfy
the given predicate. the given predicate.
Parameters Parameters
---------- ----------
predicate : callable predicate : callable
A function of signature ``func(edges) -> tensor``. A function of signature ``func(edges) -> Tensor``.
``edges`` are :class:`EdgeBatch` objects as in :mod:`~dgl.udf`. ``edges`` are :class:`dgl.EdgeBatch` objects.
The ``tensor`` returned should be a 1-D boolean tensor with Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding edge in each element indicating whether the corresponding edge in
the batch satisfies the predicate. the batch satisfies the predicate.
edges : valid edges type edges : edge ID(s) or edge end nodes, optional
Edges on which to apply ``func``. See :func:`send` for valid The edge(s) for query. The allowed formats are:
edges type. Default value is all the edges.
etype : str, optional - Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
The edge type. Can be omitted if there is only one edge type type and device should be the same as the :py:attr:`idtype` and device of the graph.
in the graph. (Default: None) - iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
- (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
format "tuple of node-tensors". The data type and device of the tensors should
be the same as the :py:attr:`idtype` and device of the graph.
- (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
By default, it considers all edges.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns Returns
------- -------
tensor tensor
Edge ids indicating the edges that satisfy the predicate. A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.
Examples Examples
-------- --------
>>> import torch
The following example uses PyTorch backend.
>>> import dgl >>> import dgl
>>> import dgl.function as fn >>> import torch
>>> g = dgl.graph(([0, 0, 1, 2], [0, 1, 2, 3]), 'user', 'follows')
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]]) Define a predicate function.
>>> g.filter_edges(lambda edges: (edges.data['h'] == 1.).squeeze(1), etype='follows')
>>> def edges_with_feature_one(edges):
... # Whether an edge has feature 1
... return (edges.data['h'] == 1.).squeeze(1)
Filter edges for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g.edata['h'] = torch.tensor([[0.], [1.], [1.]])
>>> print(g.filter_edges(edges_with_feature_one))
tensor([1, 2])
Filter on edges with IDs 0 and 1
>>> print(g.filter_edges(edges_with_feature_one, edges=torch.tensor([0, 1])))
tensor([1])
Filter edges for a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
>>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> # Filter for 'plays' nodes
>>> print(g.filter_edges(edges_with_feature_one, etype='plays'))
tensor([1, 2]) tensor([1, 2])
""" """
if is_all(edges):
pass
elif isinstance(edges, tuple):
u, v = edges
srctype, _, dsttype = self.to_canonical_etype(etype)
u = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('edges[0] contains invalid node IDs')
v = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
raise DGLError('edges[1] contains invalid node IDs')
elif isinstance(edges, Iterable) or F.is_tensor(edges):
edges = utils.prepare_tensor(self, edges, 'edges')
min_eid = F.as_scalar(F.min(edges, 0))
if len(edges) > 0 > min_eid:
raise DGLError('Invalid edge ID {:d}'.format(min_eid))
max_eid = F.as_scalar(F.max(edges, 0))
if len(edges) > 0 and max_eid >= self.num_edges(etype):
raise DGLError('Invalid edge ID {:d}'.format(max_eid))
else:
raise ValueError('Unsupported type of edges:', type(edges))
with self.local_scope(): with self.local_scope():
self.apply_edges(lambda ebatch: {'_mask' : predicate(ebatch)}, edges, etype) self.apply_edges(lambda ebatch: {'_mask' : predicate(ebatch)}, edges, etype)
etype = self.canonical_etypes[0] if etype is None else etype etype = self.canonical_etypes[0] if etype is None else etype
...@@ -3554,53 +4737,77 @@ class DGLHeteroGraph(object): ...@@ -3554,53 +4737,77 @@ class DGLHeteroGraph(object):
@property @property
def device(self): def device(self):
"""Get the device context of this graph. """Get the device of the graph.
Returns
-------
device context
The device of the graph, which should be a framework-specific device object
(e.g., ``torch.device``).
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game') >>> import dgl
>>> import torch
Create a homogeneous graph for demonstration.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> print(g.device) >>> print(g.device)
device(type='cpu') device(type='cpu')
>>> g = g.to('cuda:0')
>>> print(g.device)
device(type='cuda', index=0)
Returns The case of heterogeneous graphs is the same.
-------
Device context object
""" """
return F.to_backend_ctx(self._graph.ctx) return F.to_backend_ctx(self._graph.ctx)
def to(self, device, **kwargs): # pylint: disable=invalid-name def to(self, device, **kwargs): # pylint: disable=invalid-name
"""Move ndata, edata and graph structure to the targeted device (cpu/gpu). """Move ndata, edata and graph structure to the targeted device (cpu/gpu).
If the graph is already on the specified device, the function directly returns it.
Otherwise, it returns a cloned graph on the specified device.
Parameters Parameters
---------- ----------
device : Framework-specific device context object device : Framework-specific device context object
The context to move data to. The context to move data to (e.g., ``torch.device``).
kwargs : Key-word arguments. kwargs : Key-word arguments.
Key-word arguments fed to the framework copy function. Key-word arguments fed to the framework copy function.
Returns Returns
------- -------
g : DGLHeteroGraph DGLGraph
Moved DGLHeteroGraph of the targeted mode. The graph on the specified device.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
>>> import dgl
>>> import torch >>> import torch
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2])))
>>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [2.], [3.]]) >>> g.ndata['h'] = torch.ones(3, 1)
>>> g.edata['h'] = torch.zeros(2, 2)
>>> g1 = g.to(torch.device('cuda:0')) >>> g1 = g.to(torch.device('cuda:0'))
>>> print(g1.device) >>> print(g1.device)
device(type='cuda', index=0) device(type='cuda', index=0)
>>> print(g1.ndata['h'].device)
device(type='cuda', index=0)
>>> print(g1.nodes().device)
device(type='cuda', index=0)
The original graph is still on CPU.
>>> print(g.device) >>> print(g.device)
device(type='cpu') device(type='cpu')
>>> print(g.ndata['h'].device)
device(type='cpu')
>>> print(g.nodes().device)
device(type='cpu')
The case of heterogeneous graphs is the same.
""" """
if device is None or self.device == device: if device is None or self.device == device:
return self return self
...@@ -3680,62 +4887,72 @@ class DGLHeteroGraph(object): ...@@ -3680,62 +4887,72 @@ class DGLHeteroGraph(object):
return ret return ret
def local_var(self): def local_var(self):
"""Return a heterograph object that can be used in a local function scope. """Return a graph object for usage in a local function scope.
The returned graph object shares the feature data and graph structure of this graph. The returned graph object shares the feature data and graph structure of this graph.
However, any out-place mutation to the feature data will not reflect to this graph, However, any out-place mutation to the feature data will not reflect to this graph,
thus making it easier to use in a function scope. thus making it easier to use in a function scope (e.g. forward computation of a model).
If set, the local graph object will use same initializers for node features and If set, the local graph object will use same initializers for node features and
edge features. edge features.
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
The graph object that can be used as a local variable. The graph object for a local variable.
Notes Notes
----- -----
Internally, the returned graph shares the same feature tensors, but construct a new Inplace operations do reflect to the original graph. This function also has little
dictionary structure (aka. Frame) so adding/removing feature tensors from the returned overhead when the number of feature tensors in this graph is small.
graph will not reflect to the original graph. However, inplace operations do change
the shared tensor values, so will be reflected to the original graph. This function
also has little overhead when the number of feature tensors in this graph is small.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Avoid accidentally overriding existing feature data. This is quite common when >>> import dgl
implementing a NN module: >>> import torch
Create a function for computation on graphs.
>>> def foo(g): >>> def foo(g):
>>> g = g.local_var() ... g = g.local_var()
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3)) ... g.edata['h'] = torch.ones((g.num_edges(), 3))
>>> return g.edata['h'] ... g.edata['h2'] = torch.ones((g.num_edges(), 3))
>>> ... return g.edata['h']
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3)) ``local_var`` avoids changing the graph features when exiting the function.
>>> newh = foo(g) # get tensor of all ones
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
>>> newh = foo(g)
>>> print(g.edata['h']) # still get tensor of all zeros >>> print(g.edata['h']) # still get tensor of all zeros
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
>>> 'h2' in g.edata # new feature set in the function scope is not found
False
Automatically garbage collect locally-defined tensors without the need to manually In-place operations will still reflect to the original graph.
``pop`` the tensors.
>>> def foo(g): >>> def foo(g):
>>> g = g.local_var() ... g = g.local_var()
>>> # This 'h' feature will stay local and be GCed when the function exits ... # in-place operation
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3)) ... g.edata['h'] += 1
>>> return g.edata['h'] ... return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game') >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> h = foo(g) >>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
>>> print('h' in g.edata) >>> newh = foo(g)
False >>> print(g.edata['h']) # the result changes
tensor([[1.],
[1.],
[1.]])
See Also See Also
-------- --------
local_var local_scope
""" """
ret = copy.copy(self) ret = copy.copy(self)
ret._node_frames = [fr.clone() for fr in self._node_frames] ret._node_frames = [fr.clone() for fr in self._node_frames]
...@@ -3744,44 +4961,63 @@ class DGLHeteroGraph(object): ...@@ -3744,44 +4961,63 @@ class DGLHeteroGraph(object):
@contextmanager @contextmanager
def local_scope(self): def local_scope(self):
"""Enter a local scope context for this graph. """Enter a local scope context for the graph.
By entering a local scope, any out-place mutation to the feature data will By entering a local scope, any out-place mutation to the feature data will
not reflect to the original graph, thus making it easier to use in a function scope. not reflect to the original graph, thus making it easier to use in a function scope
(e.g. forward computation of a model).
If set, the local scope will use same initializers for node features and If set, the local scope will use same initializers for node features and
edge features. edge features.
Notes
-----
Inplace operations do reflect to the original graph. This function also has little
overhead when the number of feature tensors in this graph is small.
Examples Examples
-------- --------
The following example uses PyTorch backend. The following example uses PyTorch backend.
Avoid accidentally overriding existing feature data. This is quite common when >>> import dgl
implementing a NN module: >>> import torch
Create a function for computation on graphs.
>>> def foo(g): >>> def foo(g):
>>> with g.local_scope(): ... with g.local_scope():
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3)) ... g.edata['h'] = torch.ones((g.num_edges(), 3))
>>> return g.edata['h'] ... g.edata['h2'] = torch.ones((g.num_edges(), 3))
>>> ... return g.edata['h']
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3)) ``local_scope`` avoids changing the graph features when exiting the function.
>>> newh = foo(g) # get tensor of all ones
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
>>> newh = foo(g)
>>> print(g.edata['h']) # still get tensor of all zeros >>> print(g.edata['h']) # still get tensor of all zeros
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
>>> 'h2' in g.edata # new feature set in the function scope is not found
False
Automatically garbage collect locally-defined tensors without the need to manually In-place operations will still reflect to the original graph.
``pop`` the tensors.
>>> def foo(g): >>> def foo(g):
>>> with g.local_scope(): ... with g.local_scope():
>>> # This 'h' feature will stay local and be GCed when the function exits ... # in-place operation
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3)) ... g.edata['h'] += 1
>>> return g.edata['h'] ... return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game') >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> h = foo(g) >>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
>>> print('h' in g.edata) >>> newh = foo(g)
False >>> print(g.edata['h']) # the result changes
tensor([[1.],
[1.],
[1.]])
See Also See Also
-------- --------
...@@ -3795,10 +5031,6 @@ class DGLHeteroGraph(object): ...@@ -3795,10 +5031,6 @@ class DGLHeteroGraph(object):
self._node_frames = old_nframes self._node_frames = old_nframes
self._edge_frames = old_eframes self._edge_frames = old_eframes
def is_homogeneous(self):
"""Return if the graph is homogeneous."""
return len(self.ntypes) == 1 and len(self.etypes) == 1
def formats(self, formats=None): def formats(self, formats=None):
r"""Get a cloned graph with the specified sparse format(s) or query r"""Get a cloned graph with the specified sparse format(s) or query
for the usage status of sparse formats for the usage status of sparse formats
...@@ -3835,7 +5067,7 @@ class DGLHeteroGraph(object): ...@@ -3835,7 +5067,7 @@ class DGLHeteroGraph(object):
**Homographs or Heterographs with A Single Edge Type** **Homographs or Heterographs with A Single Edge Type**
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)]) >>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
>>> g.ndata['h'] = torch.ones(4, 1) >>> g.ndata['h'] = torch.ones(4, 1)
>>> # Check status of format usage >>> # Check status of format usage
>>> g.formats() >>> g.formats()
...@@ -3855,11 +5087,11 @@ class DGLHeteroGraph(object): ...@@ -3855,11 +5087,11 @@ class DGLHeteroGraph(object):
**Heterographs with Multiple Edge Types** **Heterographs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.formats() >>> g.formats()
{'created': ['coo'], 'not created': ['csr', 'csc']} {'created': ['coo'], 'not created': ['csr', 'csc']}
>>> # Get a clone of the graph with 'csr' format >>> # Get a clone of the graph with 'csr' format
...@@ -3894,7 +5126,7 @@ class DGLHeteroGraph(object): ...@@ -3894,7 +5126,7 @@ class DGLHeteroGraph(object):
**Homographs or Heterographs with A Single Edge Type** **Homographs or Heterographs with A Single Edge Type**
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)]) >>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
>>> g.format() >>> g.format()
{'created': ['coo'], 'not created': ['csr', 'csc']} {'created': ['coo'], 'not created': ['csr', 'csc']}
>>> g.create_format_() >>> g.create_format_()
...@@ -3904,11 +5136,11 @@ class DGLHeteroGraph(object): ...@@ -3904,11 +5136,11 @@ class DGLHeteroGraph(object):
**Heterographs with Multiple Edge Types** **Heterographs with Multiple Edge Types**
>>> g = dgl.heterograph({ >>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])), ... torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1])) ... torch.tensor([0, 1]))
>>> }) ... })
>>> g.format() >>> g.format()
{'created': ['coo'], 'not created': ['csr', 'csc']} {'created': ['coo'], 'not created': ['csr', 'csc']}
>>> g.create_format_() >>> g.create_format_()
...@@ -3934,8 +5166,7 @@ class DGLHeteroGraph(object): ...@@ -3934,8 +5166,7 @@ class DGLHeteroGraph(object):
""" """
if idtype is None: if idtype is None:
return self return self
if not idtype in (F.int32, F.int64): utils.check_valid_idtype(idtype)
raise DGLError("ID type must be int32 or int64, but got {}.".format(idtype))
if self.idtype == idtype: if self.idtype == idtype:
return self return self
bits = 32 if idtype == F.int32 else 64 bits = 32 if idtype == F.int32 else 64
...@@ -3974,51 +5205,102 @@ class DGLHeteroGraph(object): ...@@ -3974,51 +5205,102 @@ class DGLHeteroGraph(object):
def long(self): def long(self):
"""Cast this graph to use int64 IDs. """Cast the graph to one with idtype int64
Features are copied (shallow copy) to the new graph. If the graph already has idtype int64, the function directly returns it. Otherwise,
it returns a cloned graph of idtype int64 with features copied (shallow copy).
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
The graph object The graph of idtype int64.
Examples Examples
-------- --------
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game', The following example uses PyTorch backend.
>>> idtype=torch.int32)
>>> g_long = g.long() # Convert g to int64 indexed, not changing the original `g` >>> import dgl
>>> import torch
Create a graph of idtype int32.
>>> # (0, 1), (0, 2), (1, 2)
>>> g = dgl.graph((torch.tensor([0, 0, 1]).int(), torch.tensor([1, 2, 2]).int()))
>>> g.ndata['feat'] = torch.ones(3, 1)
>>> g.idtype
torch.int32
Cast the graph to one of idtype int64.
>>> # A cloned graph with an idtype of int64
>>> g_long = g.long()
>>> g_long.idtype
torch.int64
>>> # The idtype of the original graph does not change.
>>> g.idtype
torch.int32
>>> g_long.edges()
(tensor([0, 0, 1]), tensor([1, 2, 2]))
>>> g_long.ndata
{'feat': tensor([[1.],
[1.],
[1.]])}
See Also See Also
-------- --------
int int
idtype idtype
astype
""" """
return self.astype(F.int64) return self.astype(F.int64)
def int(self): def int(self):
"""Return a heterograph object use int32 as index dtype, """Cast the graph to one with idtype int32
with the ndata and edata as the original object
If the graph already has idtype int32, the function directly returns it. Otherwise,
it returns a cloned graph of idtype int32 with features copied (shallow copy).
Returns Returns
------- -------
DGLHeteroGraph DGLGraph
The graph object The graph of idtype int32.
Examples Examples
-------- --------
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game', The following example uses PyTorch backend.
>>> idtype=torch.int64)
>>> g_int = g.int() # Convert g to int32 indexed, not changing the original `g` >>> import dgl
>>> import torch
Create a graph of idtype int64.
>>> # (0, 1), (0, 2), (1, 2)
>>> g = dgl.graph((torch.tensor([0, 0, 1]), torch.tensor([1, 2, 2])))
>>> g.ndata['feat'] = torch.ones(3, 1)
>>> g.idtype
torch.int64
Cast the graph to one of idtype int32.
>>> # A cloned graph with an idtype of int32
>>> g_int = g.int()
>>> g_int.idtype
torch.int32
>>> # The idtype of the original graph does not change.
>>> g.idtype
torch.int64
>>> g_int.edges()
(tensor([0, 0, 1], dtype=torch.int32), tensor([1, 2, 2], dtype=torch.int32))
>>> g_int.ndata
{'feat': tensor([[1.],
[1.],
[1.]])}
See Also See Also
-------- --------
long long
idtype idtype
astype
""" """
return self.astype(F.int32) return self.astype(F.int32)
...@@ -4280,7 +5562,7 @@ def reduce_dict_data(frames, reducer, order=None): ...@@ -4280,7 +5562,7 @@ def reduce_dict_data(frames, reducer, order=None):
ret[k] = merger(flist) ret[k] = merger(flist)
return ret return ret
def combine_frames(frames, ids): def combine_frames(frames, ids, col_names=None):
"""Merge the frames into one frame, taking the common columns. """Merge the frames into one frame, taking the common columns.
Return None if there is no common columns. Return None if there is no common columns.
...@@ -4291,6 +5573,8 @@ def combine_frames(frames, ids): ...@@ -4291,6 +5573,8 @@ def combine_frames(frames, ids):
List of frames List of frames
ids : List[int] ids : List[int]
List of frame IDs List of frame IDs
col_names : List[str], optional
Column names to consider. If not given, it considers all columns.
Returns Returns
------- -------
...@@ -4298,7 +5582,10 @@ def combine_frames(frames, ids): ...@@ -4298,7 +5582,10 @@ def combine_frames(frames, ids):
The resulting frame The resulting frame
""" """
# find common columns and check if their schemes match # find common columns and check if their schemes match
schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()} if col_names is None:
schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()}
else:
schemes = {key: frames[ids[0]].schemes[key] for key in col_names}
for frame_id in ids: for frame_id in ids:
frame = frames[frame_id] frame = frames[frame_id]
for key, scheme in list(schemes.items()): for key, scheme in list(schemes.items()):
......
...@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase): ...@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
""" """
if order is None: if order is None:
order = "" order = ""
elif order not in ['srcdst', 'eid']:
raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
"got {}".format(order))
edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order) edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
src = F.from_dgl_nd(edge_array(0)) src = F.from_dgl_nd(edge_array(0))
dst = F.from_dgl_nd(edge_array(1)) dst = F.from_dgl_nd(edge_array(1))
......
...@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block): ...@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
is the output feature size. is the output feature size.
""" """
with graph.local_scope(): with graph.local_scope():
assert graph.is_homogeneous(), \ assert graph.is_homogeneous, \
"not a homograph; convert it with to_homo and pass in the edge type as argument" "not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]), zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
ctx=feat.context) ctx=feat.context)
feat = nd.concat(feat, zero_pad, dim=-1) feat = nd.concat(feat, zero_pad, dim=-1)
......
...@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block): ...@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
mx.ndarray.NDArray mx.ndarray.NDArray
New node features. New node features.
""" """
assert g.is_homogeneous(), \ assert g.is_homogeneous, \
"not a homograph; convert it with to_homo and pass in the edge type as argument" "not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
with g.local_scope(): with g.local_scope():
g.ndata['h'] = x g.ndata['h'] = x
g.edata['type'] = etypes g.edata['type'] = etypes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment