Unverified Commit be444e52 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)



* Update graph

* Fix for dgl.graph

* from_scipy

* Replace canonical_etypes with relations

* from_networkx

* Update for hetero_from_relations

* Roll back the change of canonical_etypes to relations

* heterograph

* bipartite

* Update doc

* Fix lint

* Fix lint

* Fix test cases

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix test

* Fix

* Update

* Use DGLError

* Update

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* rewrite sanity checks

* delete unnecessary checks

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Update

* Fix

* Update

* Fix
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 0afc3cf8
......@@ -66,7 +66,6 @@ def main(args):
hg = dataset[0]
num_rels = len(hg.canonical_etypes)
num_of_ntype = len(hg.ntypes)
category = dataset.predict_category
num_classes = dataset.num_classes
train_mask = hg.nodes[category].data.pop('train_mask')
......@@ -98,7 +97,7 @@ def main(args):
category_id = i
# edge type and normalization factor
g = dgl.to_homo(hg)
g = dgl.to_homogeneous(hg, edata=['norm'])
# check cuda
if args.gpu < 0:
......
......@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
* * \c count : The array of edge occurrences per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
*
* \note Example: consider the following graph:
* \note Example: consider a graph with the following edges
*
* g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
* [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
*
* Then ToSimpleGraph(g) would yield the following elements:
*
* * The first element would be the simple graph itself:
* * The first element would be the simple graph itself with the following edges
*
* simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)])
* [(0, 1), (1, 3), (1, 4), (2, 2)]
*
* * The second element is an array \c count. \c count[i] stands for the number of edges
* connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
......
......@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
self._node_frame = old_nframe
self._edge_frame = old_eframe
@property
def is_homogeneous(self):
"""Return if the graph is homogeneous."""
return True
......
......@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
for i in range(num_split)]
# Create graphs
gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype)
gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]
# Unbatch node features
......
"""Module for converting graph from/to other object."""
# pylint: disable=dangerous-default-value
from collections import defaultdict
from scipy.sparse import spmatrix
import numpy as np
import networkx as nx
......@@ -17,397 +17,165 @@ __all__ = [
'hetero_from_relations',
'hetero_from_shared_memory',
'heterograph',
'to_heterogeneous',
'to_hetero',
'to_homogeneous',
'to_homo',
'from_scipy',
'bipartite_from_scipy',
'from_networkx',
'bipartite_from_networkx',
'to_networkx',
]
def graph(data,
ntype='_N', etype='_E',
ntype=None, etype=None,
*,
num_nodes=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None,
device=None,
card=None,
**deprecated_kwargs):
"""Create a graph with one type of nodes and edges.
In the sparse matrix perspective, :func:`dgl.graph` creates a graph
whose adjacency matrix must be square while :func:`dgl.bipartite`
creates a graph that does not necessarily have square adjacency matrix.
"""Create a graph.
Parameters
----------
data : graph data
Data to initialize graph structure. Supported data formats are
(1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
(2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...], [2, 1, ...]))
(3) scipy sparse matrix
(4) networkx graph
The data for constructing a graph, which takes the form of :math:`(U, V)`.
:math:`(U[i], V[i])` forms the edge with ID :math:`i` in the graph.
The allowed data formats are:
- ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs.
DGL calls this format "tuple of node-tensors". The tensors should have the same
data type of int32/int64 and device context (see below the descriptions of
:attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
ntype : str, optional
Node type name. (Default: _N)
Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
etype : str, optional
Edge type name. (Default: _E)
Deprecated. To construct a graph with named edge types, use :func:`dgl.heterograph`.
num_nodes : int, optional
Number of nodes in the graph. If None, infer from input data, i.e.
the largest node ID plus 1. (Default: None)
validate : bool, optional
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and card is not None, user would receive a warning.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Valid options are int32 or int64. If None, try infer from
the given data.
device : Device context, optional
Device on which the graph is created. Default: infer from data.
card : int, optional
Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the graph).
If None, infer from input data, i.e. the largest node ID plus 1. (Default: None)
The number of nodes in the graph. If not given, this will be the largest node ID
plus 1 from the :attr:`data` argument. If given and the value is no greater than
the largest node ID from the :attr:`data` argument, DGL will raise an error.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
If ``None`` (default), DGL infers the ID type from the :attr:`data` argument.
See "Notes" for more details.
device : device context, optional
The device of the returned graph, which should be a framework-specific device object
(e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
returned graph is on CPU. If the specified :attr:`device` differs from that of the
provided tensors, it casts the given tensors to the specified device first.
Returns
-------
DGLHeteroGraph
DGLGraph
The created graph.
Notes
-----
1. If the :attr:`idtype` argument is not given then:
- in the case of the tuple of node-tensor format, DGL uses the
data type of the given ID tensors.
- in the case of the tuple of sequence format, DGL uses int64.
Once the graph has been created, you can change the data type by using
:func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
If the specified :attr:`idtype` argument differs from the data type of the provided
tensors, it casts the given tensors to the specified data type first.
2. The most efficient construction approach is to provide a tuple of node tensors without
specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
the storage with the input node-tensors in this case.
3. DGL internally maintains multiple copies of the graph structure in different
`sparse formats <https://en.wikipedia.org/wiki/Sparse_matrix>`_ and chooses the most
efficient one depending on the computation invoked. If memory usage becomes an issue
in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed
formats.
Examples
--------
Create from pairs of edges with form (src, dst)
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
The following example uses PyTorch backend.
Create from source and destination vertex ID lists
>>> import dgl
>>> import torch
>>> u = [0, 0, 1]
>>> v = [2, 3, 2]
>>> g = dgl.graph((u, v))
Create a small three-edge graph.
The IDs can also be stored in framework-specific tensors
>>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = torch.tensor([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = torch.tensor([1, 2, 3])
>>> g = dgl.graph((src_ids, dst_ids))
>>> import torch
>>> u = torch.tensor([0, 0, 1])
>>> v = torch.tensor([2, 3, 2])
>>> g = dgl.graph((u, v))
Explicitly specify the number of nodes in the graph.
Create from scipy sparse matrix
>>> g = dgl.graph((src_ids, dst_ids), num_nodes=100)
>>> from scipy.sparse import coo_matrix
>>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
>>> g = dgl.graph(spmat)
Create a graph on the first GPU with data type int32.
Create from networkx graph
>>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')
>>> import networkx as nx
>>> nxg = nx.path_graph(3)
>>> g = dgl.graph(nxg)
Specify node and edge type names
>>> g = dgl.graph(..., 'user', 'follows')
>>> g.ntypes
['user']
>>> g.etypes
['follows']
>>> g.canonical_etypes
[('user', 'follows', 'user')]
Check if node ids are within num_nodes specified
>>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=2, validate=True)
...
dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
>>> g = dgl.graph(([0, 1, 2], [1, 2, 0]), num_nodes=3, validate=True)
Graph(num_nodes=3, num_edges=3,
ndata_schemes={}
edata_schemes={})
See Also
--------
from_scipy
from_networkx
"""
# Deprecated arguments
if ntype is not None:
raise DGLError('The ntype argument is deprecated for dgl.graph. To construct ' \
'a graph with named node types, use dgl.heterograph.')
if etype is not None:
raise DGLError('The etype argument is deprecated for dgl.graph. To construct ' \
'a graph with named edge types, use dgl.heterograph.')
if isinstance(data, spmatrix):
raise DGLError("dgl.graph no longer supports graph construction from a SciPy "
"sparse matrix, use dgl.from_scipy instead.")
if isinstance(data, nx.Graph):
raise DGLError("dgl.graph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.")
if len(deprecated_kwargs) != 0:
raise DGLError("Key word arguments {} have been removed from dgl.graph()."
" They are moved to dgl.from_scipy() and dgl.from_networkx()."
" Please refer to their API documents for more details.".format(
deprecated_kwargs.keys()))
if isinstance(data, DGLHeteroGraph):
return data.astype(idtype).to(device)
if card is not None:
dgl_warning("Argument 'card' will be deprecated. "
"Please use num_nodes={} instead.".format(card))
num_nodes = card
u, v, urange, vrange = utils.graphdata2tensors(data, idtype)
if num_nodes is not None: # override the number of nodes
if num_nodes < max(urange, vrange):
raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
urange, vrange = num_nodes, num_nodes
g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange,
validate, formats=formats)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
return g.to(device)
def bipartite(data,
utype='_U', etype='_E', vtype='_V',
num_nodes=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None,
device=None,
card=None,
**deprecated_kwargs):
"""Create a bipartite graph.
The result graph is directed and edges must be from ``utype`` nodes
to ``vtype`` nodes. Nodes of each type have their own ID counts.
In the sparse matrix perspective, :func:`dgl.graph` creates a graph
whose adjacency matrix must be square while :func:`dgl.bipartite`
creates a graph that does not necessarily have square adjacency matrix.
Parameters
----------
data : graph data
Data to initialize graph structure. Supported data formats are
(1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
(2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...], [2, 1, ...]))
(3) scipy sparse matrix
(4) networkx graph
utype : str, optional
Source node type name. (Default: _U)
etype : str, optional
Edge type name. (Default: _E)
vtype : str, optional
Destination node type name. (Default: _V)
num_nodes : 2-tuple of int, optional
Number of nodes in the source and destination group. If None, infer from input data,
i.e. the largest node ID plus 1 for each type. (Default: None)
validate : bool, optional
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and card is not None, user would receive a warning.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Valid options are int32 or int64. If None, try infer from
the given data.
device : Device context, optional
Device on which the graph is created. Default: infer from data.
card : 2-tuple of int, optional
Deprecated (see :attr:`num_nodes`). Cardinality (number of nodes in the source and
destination group). If None, infer from input data, i.e. the largest node ID plus 1
for each type. (Default: None)
Returns
-------
DGLHeteroGraph
Examples
--------
Create from pairs of edges
>>> g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game')
>>> g.ntypes
['user', 'game']
>>> g.etypes
['plays']
>>> g.canonical_etypes
[('user', 'plays', 'game')]
>>> g.number_of_nodes('user')
2
>>> g.number_of_nodes('game')
4
>>> g.number_of_edges('plays') # 'plays' could be omitted here
3
Create from source and destination vertex ID lists
>>> u = [0, 0, 1]
>>> v = [2, 3, 2]
>>> g = dgl.bipartite((u, v))
The IDs can also be stored in framework-specific tensors
>>> import torch
>>> u = torch.tensor([0, 0, 1])
>>> v = torch.tensor([2, 3, 2])
>>> g = dgl.bipartite((u, v))
Create from scipy sparse matrix. Since scipy sparse matrix has explicit
shape, the cardinality of the result graph is derived from that.
>>> from scipy.sparse import coo_matrix
>>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
>>> g = dgl.bipartite(spmat, 'user', 'plays', 'game')
>>> g.number_of_nodes('user')
4
>>> g.number_of_nodes('game')
4
Create from networkx graph. The given graph must follow the bipartite
graph convention in networkx. Each node has a ``bipartite`` attribute
with values 0 or 1. The result graph has two types of nodes and only
edges from ``bipartite=0`` to ``bipartite=1`` will be included.
>>> import networkx as nx
>>> nxg = nx.complete_bipartite_graph(3, 4)
>>> g = dgl.bipartite(nxg, 'user', 'plays', 'game')
>>> g.number_of_nodes('user')
3
>>> g.number_of_nodes('game')
4
>>> g.edges()
(tensor([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]), tensor([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]))
Check if node ids are within num_nodes specified
>>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(2, 4), validate=True)
...
dgl._ffi.base.DGLError: Invalid node id 2 (should be less than cardinality 2).
>>> g = dgl.bipartite(([0, 1, 2], [1, 2, 3]), num_nodes=(3, 4), validate=True)
>>> g
Graph(num_nodes={'_U': 3, '_V': 4},
num_edges={('_U', '_E', '_V'): 3},
metagraph=[('_U', '_V')])
"""
if len(deprecated_kwargs) != 0:
raise DGLError("Key word arguments {} have been removed from dgl.graph()."
" They are moved to dgl.from_scipy() and dgl.from_networkx()."
" Please refer to their API documents for more details.".format(
deprecated_kwargs.keys()))
if utype == vtype:
raise DGLError('utype should not be equal to vtype. Use ``dgl.graph`` instead.')
if card is not None:
dgl_warning("Argument 'card' will be deprecated. "
"Please use num_nodes={} instead.".format(card))
num_nodes = card
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True)
if num_nodes is not None: # override the number of nodes
urange, vrange = num_nodes
g = create_from_edges(
u, v, utype, etype, vtype, urange, vrange, validate,
formats=formats)
return g.to(device)
validate=True,
restrict_format='any',
**kwargs):
"""DEPRECATED: use dgl.heterograph instead."""
raise DGLError(
'dgl.bipartite is deprecated. Use dgl.heterograph({' +
"('{}', '{}', '{}')".format(utype, etype, vtype) +
' : data} to create a bipartite graph instead.')
def hetero_from_relations(rel_graphs, num_nodes_per_type=None):
"""Create a heterograph from graphs representing connections of each relation.
The input is a list of heterographs where the ``i``th graph contains edges of type
:math:`(s_i, e_i, d_i)`.
If two graphs share a same node type, the number of nodes for the corresponding type
should be the same. See **Examples** for details.
Parameters
----------
rel_graphs : list of DGLHeteroGraph
Each element corresponds to a heterograph for one (src, edge, dst) relation.
num_nodes_per_type : dict[str, Tensor], optional
Number of nodes per node type. If not given, DGL will infer the number of nodes
from the given relation graphs.
Returns
-------
DGLHeteroGraph
A heterograph consisting of all relations.
Examples
--------
>>> import dgl
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
>>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game')
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
will raise an error as we have 3 nodes of type 'user' in follows_g and 4 nodes of type
'user' in plays_g.
We have two possible methods to avoid the construction.
**Method 1**: Manually specify the number of nodes for all types when constructing
the relation graphs.
>>> # A graph with 4 nodes of type 'user'
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', num_nodes=4)
>>> # A bipartite graph with 4 nodes of src type ('user') and 2 nodes of dst type ('game')
>>> plays_g = dgl.bipartite([(0, 0), (3, 1)], 'user', 'plays', 'game', num_nodes=(4, 2))
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
>>> print(g)
Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2,
('developer', 'develops', 'game'): 2},
metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
``devs_g`` does not have nodes of type ``'user'`` so no error will be raised.
**Method 2**: Construct a heterograph at once without intermediate relation graphs,
in which case we will infer the number of nodes for each type.
>>> g = dgl.heterograph({
>>> ('user', 'follows', 'user'): [(0, 1), (1, 2)],
>>> ('user', 'plays', 'game'): [(0, 0), (3, 1)],
>>> ('developer', 'develops', 'game'): [(0, 0), (1, 1)]
>>> })
>>> print(g)
Graph(num_nodes={'user': 4, 'game': 2, 'developer': 2},
num_edges={('user', 'follows', 'user'): 2,
('user', 'plays', 'game'): 2,
('developer', 'develops', 'game'): 2},
metagraph=[('user', 'user'), ('user', 'game'), ('developer', 'game')])
"""
utils.check_all_same_idtype(rel_graphs, 'rel_graphs')
utils.check_all_same_device(rel_graphs, 'rel_graphs')
# TODO(minjie): this API can be generalized as a union operation of the input graphs
# TODO(minjie): handle node/edge data
# infer meta graph
meta_edges_src, meta_edges_dst = [], []
ntypes = []
etypes = []
# TODO(BarclayII): I'm keeping the node type names sorted because even if
# the metagraph is the same, the same node type name in different graphs may
# map to different node type IDs.
# In the future, we need to lower the type names into C++.
if num_nodes_per_type is None:
ntype_set = set()
for rgrh in rel_graphs:
assert len(rgrh.etypes) == 1
stype, etype, dtype = rgrh.canonical_etypes[0]
ntype_set.add(stype)
ntype_set.add(dtype)
ntypes = list(sorted(ntype_set))
else:
ntypes = list(sorted(num_nodes_per_type.keys()))
num_nodes_per_type = utils.toindex([num_nodes_per_type[ntype] for ntype in ntypes], "int64")
ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
for rgrh in rel_graphs:
stype, etype, dtype = rgrh.canonical_etypes[0]
meta_edges_src.append(ntype_dict[stype])
meta_edges_dst.append(ntype_dict[dtype])
etypes.append(etype)
# metagraph is DGLGraph, currently still using int64 as index dtype
metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
# create graph index
hgidx = heterograph_index.create_heterograph_from_relations(
metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
retg = DGLHeteroGraph(hgidx, ntypes, etypes)
for i, rgrh in enumerate(rel_graphs):
for ntype in rgrh.ntypes:
retg.nodes[ntype].data.update(rgrh.nodes[ntype].data)
retg._edge_frames[i].update(rgrh._edge_frames[0])
return retg
"""DEPRECATED: use dgl.heterograph instead."""
raise DGLError('dgl.hetero_from_relations is deprecated.\n\n'
'Use dgl.heterograph instead.')
def hetero_from_shared_memory(name):
"""Create a heterograph from shared memory with the given name.
......@@ -429,94 +197,163 @@ def hetero_from_shared_memory(name):
def heterograph(data_dict,
num_nodes_dict=None,
validate=True,
formats=['coo', 'csr', 'csc'],
idtype=None,
device=None):
"""Create a heterogeneous graph from a dictionary between edge types and edge lists.
"""Create a heterogeneous graph.
Parameters
----------
data_dict : dict
The dictionary between edge types and edge list data.
data_dict : graph data
The dictionary data for constructing a heterogeneous graph. The keys are in the form of
string triplets (src_type, edge_type, dst_type), specifying the source node,
edge, and destination node types. The values are graph data in the form of
:math:`(U, V)`, where :math:`(U[i], V[i])` forms the edge with ID :math:`i`.
The allowed graph data formats are:
- ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls
this format "tuple of node-tensors". The tensors should have the same data type,
which must be either int32 or int64. They should also have the same device context
(see below the descriptions of :attr:`idtype` and :attr:`device`).
- ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
num_nodes_dict : dict[str, int], optional
The number of nodes for some node types, which is a dictionary mapping a node type
:math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
:math:`T`, DGL finds the largest ID appearing in *every* graph data whose source
or destination node type is :math:`T`, and sets the number of nodes to be that ID
plus one. If given and the value is no greater than the largest ID for some node type,
DGL will raise an error. By default, DGL infers the number of nodes for all node types.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
If ``None`` (default), DGL infers the ID type from the :attr:`data_dict` argument.
device : device context, optional
The device of the returned graph, which should be a framework-specific device object
(e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of
the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
returned graph is on CPU. If the specified :attr:`device` differs from that of the
provided tensors, it casts the given tensors to the specified device first.
The edge types are specified as a triplet of (source node type name, edge type
name, destination node type name).
Returns
-------
DGLGraph
The created graph.
The edge list data can be anything acceptable by :func:`dgl.graph` or
:func:`dgl.bipartite`, or objects returned by the two functions themselves.
num_nodes_dict : dict[str, int]
The number of nodes for each node type.
Notes
-----
1. If the :attr:`idtype` argument is not given then:
By default DGL infers the number of nodes for each node type from ``data_dict``
by taking the maximum node ID plus one for each node type.
validate : bool, optional
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and num_nodes_dict is not None, user would receive a warning.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Valid options are int32 or int64. If None, try infer from
the given data.
device : Device context, optional
Device on which the graph is created. Default: infer from data.
- in the case of the tuple of node-tensor format, DGL uses
the data type of the given ID tensors.
- in the case of the tuple of sequence format, DGL uses int64.
Returns
-------
DGLHeteroGraph
Once the graph has been created, you can change the data type by using
:func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`.
If the specified :attr:`idtype` argument differs from the data type of the provided
tensors, it casts the given tensors to the specified data type first.
2. The most efficient construction approach is to provide a tuple of node tensors without
specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares
the storage with the input node-tensors in this case.
3. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): [(0, 1), (1, 2)],
... ('user', 'plays', 'game'): [(0, 0), (1, 0), (1, 1), (2, 1)],
... ('developer', 'develops', 'game'): [(0, 0), (1, 1)],
... })
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a heterograph with three canonical edge types.
>>> data_dict = {
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'topic'): (torch.tensor([1, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([0, 3]), torch.tensor([3, 4]))
... }
>>> g = dgl.heterograph(data_dict)
>>> g
Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4},
num_edges={('user', 'follows', 'user'): 2, ('user', 'follows', 'topic'): 2,
('user', 'plays', 'game'): 2},
metagraph=[('user', 'user', 'follows'), ('user', 'topic', 'follows'),
('user', 'game', 'plays')])
Explicitly specify the number of nodes for each node type in the graph.
>>> num_nodes_dict = {'user': 4, 'topic': 4, 'game': 6}
>>> g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict)
Create a graph on the first GPU with data type int32.
>>> g = dgl.heterograph(data_dict, idtype=torch.int32, device='cuda:0')
"""
# Try infer idtype
if idtype is None:
for data in data_dict.values():
if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]):
idtype = F.dtype(data[0])
break
# Convert all data to edge tensors first.
data_dict = {(sty, ety, dty) : utils.graphdata2tensors(data, idtype, bipartite=(sty != dty))
for (sty, ety, dty), data in data_dict.items()}
# infer number of nodes for each node type
# Convert all data to node tensors first
node_tensor_dict = {}
need_infer = num_nodes_dict is None
if num_nodes_dict is None:
num_nodes_dict = defaultdict(int)
for (srctype, etype, dsttype), data in data_dict.items():
_, _, nsrc, ndst = data
num_nodes_dict[srctype] = max(num_nodes_dict[srctype], nsrc)
num_nodes_dict[dsttype] = max(num_nodes_dict[dsttype], ndst)
for (sty, ety, dty), data in data_dict.items():
if isinstance(data, spmatrix):
raise DGLError("dgl.heterograph no longer supports graph construction from a SciPy "
"sparse matrix, use dgl.from_scipy instead.")
if isinstance(data, nx.Graph):
raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
"graph, use dgl.from_networkx instead.")
is_bipartite = (sty != dty)
u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite)
node_tensor_dict[(sty, ety, dty)] = (u, v)
if need_infer:
num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
else: # sanity check
if num_nodes_dict[sty] < urange:
raise DGLError('The given number of nodes of node type {} must be larger than'
' the max ID in the data, but got {} and {}.'.format(
sty, num_nodes_dict[sty], urange - 1))
if num_nodes_dict[dty] < vrange:
raise DGLError('The given number of nodes of node type {} must be larger than'
' the max ID in the data, but got {} and {}.'.format(
sty, num_nodes_dict[dty], vrange - 1))
# Create the graph
# Sort the ntypes and relation tuples to have a deterministic order for the same set
# of type names.
ntypes = list(sorted(num_nodes_dict.keys()))
relations = list(sorted(node_tensor_dict.keys()))
num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
meta_edges_src = []
meta_edges_dst = []
etypes = []
rel_graphs = []
for (srctype, etype, dsttype), data in data_dict.items():
u, v, _, _ = data
if srctype == dsttype:
rel_graphs.append(graph(
(u, v), srctype, etype,
num_nodes=num_nodes_dict[srctype],
validate=validate,
formats=formats,
idtype=idtype, device=device))
else:
rel_graphs.append(bipartite(
(u, v), srctype, etype, dsttype,
num_nodes=(num_nodes_dict[srctype], num_nodes_dict[dsttype]),
validate=validate,
formats=formats,
idtype=idtype, device=device))
for srctype, etype, dsttype in relations:
meta_edges_src.append(ntype_dict[srctype])
meta_edges_dst.append(ntype_dict[dsttype])
etypes.append(etype)
src, dst = node_tensor_dict[(srctype, etype, dsttype)]
g = create_from_edges(src, dst, srctype, etype, dsttype,
num_nodes_dict[srctype], num_nodes_dict[dsttype])
rel_graphs.append(g)
return hetero_from_relations(rel_graphs, num_nodes_dict)
# metagraph is DGLGraph, currently still using int64 as index dtype
metagraph = graph_index.from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True)
# create graph index
hgidx = heterograph_index.create_heterograph_from_relations(
metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type)
retg = DGLHeteroGraph(hgidx, ntypes, etypes)
return retg.to(device)
def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
metagraph=None):
def to_heterogeneous(G, ntypes, etypes, ntype_field=NTYPE,
etype_field=ETYPE, metagraph=None):
"""Convert the given homogeneous graph to a heterogeneous graph.
The input graph should have only one type of nodes and edges. Each node and edge
......@@ -531,10 +368,13 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
(0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge
type name, but can be distinguished by a canonical edge type tuple.
This function will copy any node/edge features from :attr:`G` to the returned heterogeneous
graph, except for node/edge types and IDs used to recover the heterogeneous graph.
Parameters
----------
G : DGLHeteroGraph
Input homogeneous graph.
G : DGLGraph
The homogeneous graph.
ntypes : list of str
The node type names.
etypes : list of str
......@@ -551,8 +391,8 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
Returns
-------
DGLHeteroGraph
A heterograph. The parent node and edge ID are stored in the column
DGLGraph
A heterogeneous graph. The parent node and edge ID are stored in the column
``dgl.NID`` and ``dgl.EID`` respectively for all node/edge types.
Notes
......@@ -568,47 +408,47 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
Examples
--------
>>> g1 = dgl.bipartite([(0, 1), (1, 2)], 'user', 'develops', 'activity')
>>> g2 = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> hetero_g = dgl.hetero_from_relations([g1, g2])
>>> print(hetero_g)
>>> import dgl
>>> hg = dgl.heterograph({
... ('user', 'develops', 'activity'): ([0, 1], [1, 2]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1])
... })
>>> print(hg)
Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
metagraph=[('user', 'activity'), ('developer', 'game')])
We first convert the heterogeneous graph to a homogeneous graph.
>>> homo_g = dgl.to_homo(hetero_g)
>>> print(homo_g)
>>> g = dgl.to_homogeneous(hg)
>>> print(g)
Graph(num_nodes=9, num_edges=4,
ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)}
edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
'_ID': Scheme(shape=(), dtype=torch.int64)})
>>> homo_g.ndata
>>> g.ndata
{'_TYPE': tensor([0, 0, 1, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 0, 1, 2, 0, 1, 0, 1])}
Nodes 0, 1 for 'user', 2, 3, 4 for 'activity', 5, 6 for 'developer', 7, 8 for 'game'
>>> homo_g.edata
>>> g.edata
{'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
Edges 0, 1 for ('user', 'develops', 'activity'), 2, 3 for ('developer', 'develops', 'game')
Now convert the homogeneous graph back to a heterogeneous graph.
>>> hetero_g_2 = dgl.to_hetero(homo_g, hetero_g.ntypes, hetero_g.etypes)
>>> print(hetero_g_2)
>>> hg_2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes)
>>> print(hg_2)
Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
num_edges={('user', 'develops', 'activity'): 2, ('developer', 'develops', 'game'): 2},
metagraph=[('user', 'activity'), ('developer', 'game')])
See Also
--------
dgl.to_homo
to_homogeneous
"""
# TODO(minjie): use hasattr to support DGLGraph input; should be fixed once
# DGLGraph is merged with DGLHeteroGraph
if (hasattr(G, 'ntypes') and len(G.ntypes) > 1
or hasattr(G, 'etypes') and len(G.etypes) > 1):
raise DGLError('The input graph should be homogenous and have only one '
raise DGLError('The input graph should be homogeneous and have only one '
' type of nodes and edges.')
num_ntypes = len(ntypes)
......@@ -660,48 +500,49 @@ def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]
rel_graphs = []
data_dict = dict()
canonical_etypes = []
for i, (stid, etid, dtid) in enumerate(canonical_etids):
src_of_etype = src_local[edge_groups[i]]
dst_of_etype = dst_local[edge_groups[i]]
if stid == dtid:
rel_graph = graph(
(src_of_etype, dst_of_etype), ntypes[stid], etypes[etid],
num_nodes=ntype_count[stid], validate=False,
idtype=idtype, device=device)
else:
rel_graph = bipartite(
(src_of_etype,
dst_of_etype), ntypes[stid], etypes[etid], ntypes[dtid],
num_nodes=(ntype_count[stid], ntype_count[dtid]),
validate=False, idtype=idtype, device=device)
rel_graphs.append(rel_graph)
hg = hetero_from_relations(rel_graphs,
{ntype: count for ntype, count in zip(
ntypes, ntype_count)})
canonical_etypes.append((ntypes[stid], etypes[etid], ntypes[dtid]))
data_dict[canonical_etypes[-1]] = \
(src_of_etype, dst_of_etype)
hg = heterograph(data_dict,
{ntype: count for ntype, count in zip(ntypes, ntype_count)},
idtype=idtype, device=device)
ntype2ngrp = {ntype : node_groups[ntid] for ntid, ntype in enumerate(ntypes)}
# features
for key, data in G.ndata.items():
if key in [ntype_field, NID]:
continue
for ntid, ntype in enumerate(hg.ntypes):
rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data))
hg._node_frames[ntid][key] = F.gather_row(data, rows)
for key, data in G.edata.items():
if key in [etype_field, EID]:
continue
for etid in range(len(hg.canonical_etypes)):
rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data))
hg._edge_frames[etid][key] = F.gather_row(data, rows)
hg._edge_frames[hg.get_etype_id(canonical_etypes[etid])][key] = \
F.gather_row(data, rows)
for ntid, ntype in enumerate(hg.ntypes):
hg._node_frames[ntid][NID] = F.tensor(ntype2ngrp[ntype])
return hg
for etid in range(len(hg.canonical_etypes)):
hg._edge_frames[etid][EID] = F.tensor(edge_groups[etid])
def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE,
metagraph=None):
"""Convert the given homogeneous graph to a heterogeneous graph.
return hg
DEPRECATED: Please use to_heterogeneous
"""
dgl_warning("dgl.to_hetero is deprecated. Please use dgl.to_heterogeneous")
return to_heterogeneous(G, ntypes, etypes, ntype_field=ntype_field,
etype_field=etype_field, metagraph=metagraph)
def to_homo(G):
def to_homogeneous(G, ndata=None, edata=None):
"""Convert the given heterogeneous graph to a homogeneous graph.
The returned graph has only one type of nodes and edges.
......@@ -710,34 +551,62 @@ def to_homo(G):
is an integer representing the type id, which can be used to retrieve the type
names stored in ``G.ntypes`` and ``G.etypes`` arguments.
If all
Parameters
----------
G : DGLHeteroGraph
Input heterogeneous graph.
G : DGLGraph
The heterogeneous graph.
ndata : list[str], optional
The node features to combine across all node types. For each feature ``feat`` in
:attr:`ndata`, it concatenates ``G.nodes[T].data[feat]`` across all node types ``T``.
As a result, the feature ``feat`` of all node types should have the same shape and
data type. By default, the returned graph will not have any node features.
edata : list[str], optional
The edge features to combine across all edge types. For each feature ``feat`` in
:attr:`edata`, it concatenates ``G.edges[T].data[feat]`` across all edge types ``T``.
As a result, the feature ``feat`` of all edge types should have the same shape and
data type. By default, the returned graph will not have any edge features.
Returns
-------
DGLHeteroGraph
DGLGraph
A homogeneous graph. The parent node and edge type/ID are stored in
columns ``dgl.NTYPE/dgl.NID`` and ``dgl.ETYPE/dgl.EID`` respectively.
Examples
--------
>>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
>>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
>>> hetero_g = dgl.hetero_from_relations([follows_g, devs_g])
>>> homo_g = dgl.to_homo(hetero_g)
>>> homo_g.ndata
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1])
... })
>>> hg.nodes['user'].data['h'] = torch.ones(3, 1)
>>> hg.nodes['developer'].data['h'] = torch.zeros(2, 1)
>>> hg.nodes['game'].data['h'] = torch.ones(2, 1)
>>> g = dgl.to_homogeneous(hg)
>>> # The first three nodes are for 'user', the next two are for 'developer',
>>> # and the last two are for 'game'
>>> g.ndata
{'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])}
First three nodes for 'user', next two for 'developer' and the last two for 'game'
>>> homo_g.edata
>>> # The first two edges are for 'follows', and the next two are for 'develops' edges.
>>> g.edata
{'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
First two edges for 'follows', next two for 'develops'
Combine feature 'h' across all node types in the conversion.
>>> g = dgl.to_homogeneous(hg, ndata=['h'])
>>> g.ndata['h']
tensor([[1.], [1.], [1.], [0.], [0.], [1.], [1.]])
See Also
--------
dgl.to_hetero
to_heterogeneous
"""
num_nodes_per_ntype = [G.number_of_nodes(ntype) for ntype in G.ntypes]
offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0)
......@@ -767,11 +636,15 @@ def to_homo(G):
eids.append(F.arange(0, num_edges, G.idtype))
retg = graph((F.cat(srcs, 0), F.cat(dsts, 0)), num_nodes=total_num_nodes,
validate=False, idtype=G.idtype, device=G.device)
idtype=G.idtype, device=G.device)
# copy features
comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)))
comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)))
if ndata is None:
ndata = []
if edata is None:
edata = []
comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)), col_names=ndata)
comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)), col_names=edata)
if comb_nf is not None:
retg.ndata.update(comb_nf)
if comb_ef is not None:
......@@ -785,89 +658,317 @@ def to_homo(G):
return retg
def to_homo(G):
"""Convert the given heterogeneous graph to a homogeneous graph.
DEPRECATED: Please use to_homogeneous
"""
dgl_warning("dgl.to_homo is deprecated. Please use dgl.to_homogeneous")
return to_homogeneous(G)
def from_scipy(sp_mat,
ntype='_N', etype='_E',
eweight_name=None,
formats=['coo', 'csr', 'csc'],
idtype=None):
"""Create a DGLGraph from a SciPy sparse matrix.
idtype=None,
device=None):
"""Create a graph from a SciPy sparse matrix.
Parameters
----------
sp_mat : SciPy sparse matrix
SciPy sparse matrix.
ntype : str
Type name for both source and destination nodes
etype : str
Type name for edges
sp_mat : scipy.sparse.spmatrix
The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]`` represents an edge from
node ``i`` to ``j``. The matrix must have square shape ``(N, N)``, where ``N`` is the
number of nodes in the graph.
eweight_name : str, optional
If given, the edge weights in the matrix will be
stored in ``edata[eweight_name]``.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The edata name for storing the nonzero values of :attr:`sp_mat`. If given, DGL will
store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]`` of the returned
graph.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns
-------
g : DGLGraph
DGLGraph
The created graph.
Notes
-----
1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
:class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
2. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import numpy as np
>>> import torch
>>> from scipy.sparse import coo_matrix
Create a small three-edge graph.
>>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = np.array([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = np.array([1, 2, 3])
>>> # Weight for edges (2, 1), (3, 2), (4, 3)
>>> eweight = np.array([0.2, 0.3, 0.5])
>>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)), shape=(5, 5))
>>> g = dgl.from_scipy(sp_mat)
Retrieve the edge weights.
>>> g = dgl.from_scipy(sp_mat, eweight_name='w')
>>> g.edata['w']
tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
Create a graph on the first GPU with data type int32.
>>> g = dgl.from_scipy(sp_mat, idtype=torch.int32, device='cuda:0')
See Also
--------
graph
from_networkx
"""
# Sanity check
num_rows = sp_mat.shape[0]
num_cols = sp_mat.shape[1]
if num_rows != num_cols:
raise DGLError('Expect the number of rows to be the same as the number of columns for '
'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
g = create_from_edges(u, v, ntype, etype, ntype, urange, vrange,
validate=False, formats=formats)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data)
return g.to(device)
def bipartite_from_scipy(sp_mat,
utype, etype, vtype,
eweight_name=None,
idtype=None,
device=None):
"""Create a unidirectional bipartite graph from a SciPy sparse matrix.
The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
Parameters
----------
sp_mat : scipy.sparse.spmatrix
The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]``
represents an edge from node ``i`` of type :attr:`utype` to ``j`` of type :attr:`vtype`.
Let the matrix shape be ``(N, M)``. There will be ``N`` nodes of type :attr:`utype`
and ``M`` nodes of type ``vtype`` in the resulting graph.
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
eweight_name : str, optional
The edata name for storing the nonzero values of :attr:`sp_mat`.
If given, DGL will store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]``
of the returned graph.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns
-------
DGLGraph
The created graph.
Notes
-----
1. The function supports all kinds of SciPy sparse matrix classes (e.g.,
:class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate
format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`.
Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way.
2. DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import numpy as np
>>> import torch
>>> from scipy.sparse import coo_matrix
Create a small three-edge graph.
>>> # Source nodes for edges (2, 1), (3, 2), (4, 3)
>>> src_ids = np.array([2, 3, 4])
>>> # Destination nodes for edges (2, 1), (3, 2), (4, 3)
>>> dst_ids = np.array([1, 2, 3])
>>> # Weight for edges (2, 1), (3, 2), (4, 3)
>>> eweight = np.array([0.2, 0.3, 0.5])
>>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)))
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V')
Retrieve the edge weights.
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V', eweight_name='w')
>>> g.edata['w']
tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64)
Create a graph on the first GPU with data type int32.
>>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V',
... idtype=torch.int32, device='cuda:0')
See Also
--------
heterograph
bipartite_from_networkx
"""
# Sanity check
u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
if eweight_name is not None:
g.edata[eweight_name] = F.tensor(sp_mat.data)
return g
return g.to(device)
def from_networkx(nx_graph, *,
ntype='_N', etype='_E',
def from_networkx(nx_graph,
node_attrs=None,
edge_attrs=None,
edge_id_attr_name='id',
formats=['coo', 'csr', 'csc'],
idtype=None):
"""Create a DGLGraph from networkx.
edge_id_attr_name=None,
idtype=None,
device=None):
"""Create a graph from a NetworkX graph.
Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
It is recommended to first convert a NetworkX graph into a tuple of node-tensors
and then construct a DGLGraph with :func:`dgl.graph`.
Parameters
----------
nx_graph : networkx.Graph
NetworkX graph.
ntype : str
Type name for both source and destination nodes
etype : str
Type name for edges
node_attrs : list of str
Names for node features to retrieve from the NetworkX graph (Default: None)
edge_attrs : list of str
Names for edge features to retrieve from the NetworkX graph (Default: None)
The NetworkX graph holding the graph structure and the node/edge attributes.
DGL will relabel the nodes using consecutive integers starting from zero if it is
not the case. If the input graph is undirected, DGL converts it to a directed graph
by :func:`networkx.Graph.to_directed`.
node_attrs : list[str], optional
The names of the node attributes to retrieve from the NetworkX graph. If given, DGL
stores the retrieved node attributes in ``ndata`` of the returned graph using their
original names. The attribute data must be convertible to Tensor type (e.g., scalar,
numpy.ndarray, list, etc.).
edge_attrs : list[str], optional
The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
stores the retrieved edge attributes in ``edata`` of the returned graph using their
original names. The attribute data must be convertible to Tensor type (e.g., scalar,
numpy.ndarray, list, etc.). It must be None if :attr:`nx_graph` is undirected.
edge_id_attr_name : str, optional
Key name for edge ids in the NetworkX graph. If not found, we
will consider the graph not to have pre-specified edge ids. (Default: 'id')
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
idtype : int32, int64, optional
Integer ID type. Must be int32 or int64. Default: int64.
The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
consecutive integers starting from zero. By default, the edge IDs of the returned graph
can be arbitrary. It must be None if :attr:`nx_graph` is undirected.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., ``torch.device``). By default, DGL stores the graph on CPU.
Returns
-------
g : DGLGraph
DGLGraph
The created graph.
Notes
-----
DGL internally maintains multiple copies of the graph structure in different sparse
formats and chooses the most efficient one depending on the computation invoked.
If memory usage becomes an issue in the case of large graphs, use
:func:`dgl.DGLGraph.formats` to restrict the allowed formats.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import networkx as nx
>>> import numpy as np
>>> import torch
Create a 2-edge NetworkX graph.
>>> nx_g = nx.DiGraph()
>>> # Add 3 nodes and two features for them
>>> nx_g.add_nodes_from([0, 1, 2], feat1=np.zeros((3, 1)), feat2=np.ones((3, 1)))
>>> # Add 2 edges (1, 2) and (2, 1) with two features, one being edge IDs
>>> nx_g.add_edge(1, 2, weight=np.ones((1, 1)), eid=np.array([1]))
>>> nx_g.add_edge(2, 1, weight=np.ones((1, 1)), eid=np.array([0]))
Convert it into a DGLGraph with structure only.
>>> g = dgl.from_networkx(nx_g)
Retrieve the node/edge features of the graph.
>>> g = dgl.from_networkx(nx_g, node_attrs=['feat1', 'feat2'], edge_attrs=['weight'])
Use a pre-specified ordering of the edges.
>>> g.edges()
(tensor([1, 2]), tensor([2, 1]))
>>> g = dgl.from_networkx(nx_g, edge_id_attr_name='eid')
(tensor([2, 1]), tensor([1, 2]))
Create a graph on the first GPU with data type int32.
>>> g = dgl.from_networkx(nx_g, idtype=torch.int32, device='cuda:0')
See Also
--------
graph
from_scipy
"""
# Relabel nodes using consecutive integers
# Sanity check
if edge_id_attr_name is not None and \
edge_id_attr_name not in next(iter(nx_graph.edges(data=True)))[-1]:
raise DGLError('Failed to find the pre-specified edge IDs in the edge features of '
'the NetworkX graph with name {}'.format(edge_id_attr_name))
if not nx_graph.is_directed() and not (edge_id_attr_name is None and edge_attrs is None):
raise DGLError('Expect edge_id_attr_name and edge_attrs to be None when nx_graph is '
'undirected, got {} and {}'.format(edge_id_attr_name, edge_attrs))
# Relabel nodes using consecutive integers starting from 0
nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted')
if not nx_graph.is_directed():
nx_graph = nx_graph.to_directed()
g = graph(nx_graph, ntype, etype,
formats=formats,
idtype=idtype)
u, v, urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict
if nx_graph.number_of_edges() > 0:
has_edge_id = edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]
else:
has_edge_id = False
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
# handle features
# copy attributes
......@@ -912,31 +1013,252 @@ def from_networkx(nx_graph, *,
raise DGLError('Not all edges have attribute {}.'.format(attr))
g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)
return g
return g.to(device)
def bipartite_from_networkx(nx_graph,
utype, etype, vtype,
u_attrs=None, e_attrs=None, v_attrs=None,
edge_id_attr_name=None,
idtype=None,
device=None):
"""Create a unidirectional bipartite graph from a NetworkX graph.
The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one
edge type ``etype`` whose edges are from ``utype`` to ``vtype``.
Creating a DGLGraph from a NetworkX graph is not fast especially for large scales.
It is recommended to first convert a NetworkX graph into a tuple of node-tensors
and then construct a DGLGraph with :func:`dgl.heterograph`.
Parameters
----------
nx_graph : networkx.DiGraph
The NetworkX graph holding the graph structure and the node/edge attributes.
DGL will relabel the nodes using consecutive integers starting from zero if it is
not the case. The graph must follow `NetworkX's bipartite graph convention
<https://networkx.github.io/documentation/stable/reference/algorithms/bipartite.html>`_,
and furthermore the edges must be from nodes with attribute ``bipartite=0`` to nodes
with attribute ``bipartite=1``.
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
u_attrs : list[str], optional
The names of the node attributes for node type :attr:`utype` to retrieve from the
NetworkX graph. If given, DGL stores the retrieved node attributes in
``nodes[utype].data`` of the returned graph using their original names. The attribute
data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
e_attrs : list[str], optional
The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL
stores the retrieved edge attributes in ``edata`` of the returned graph using their
original names. The attribute data must be convertible to Tensor type (e.g., scalar,
numpy.ndarray, list, etc.).
v_attrs : list[str], optional
The names of the node attributes for node type :attr:`vtype` to retrieve from the
NetworkX graph. If given, DGL stores the retrieved node attributes in
``nodes[vtype].data`` of the returned graph using their original names. The attribute
data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.).
edge_id_attr_name : str, optional
The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge
IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e.
consecutive integers starting from zero. By default, the edge IDs of the returned graph
can be arbitrary.
idtype : int32 or int64, optional
The data type for storing the structure-related graph information such as node and
edge IDs. It should be a framework-specific data type object (e.g., torch.int32).
By default, DGL uses int64.
device : device context, optional
The device of the resulting graph. It should be a framework-specific device object
(e.g., torch.device). By default, DGL stores the graph on CPU.
Returns
-------
DGLGraph
The created graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import networkx as nx
>>> import numpy as np
>>> import torch
Create a 2-edge unidirectional bipartite graph.
>>> nx_g = nx.DiGraph()
>>> # Add nodes for the source type
>>> nx_g.add_nodes_from([1, 3], bipartite=0, feat1=np.zeros((2, 1)), feat2=np.ones((2, 1)))
>>> # Add nodes for the destination type
>>> nx_g.add_nodes_from([2, 4, 5], bipartite=1, feat3=np.zeros((3, 1)))
>>> nx_g.add_edge(1, 4, weight=np.ones((1, 1)), eid=np.array([1]))
>>> nx_g.add_edge(3, 5, weight=np.ones((1, 1)), eid=np.array([0]))
Convert it into a DGLGraph with structure only.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V')
Retrieve the node/edge features of the graph.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
... u_attrs=['feat1', 'feat2'],
... e_attrs=['weight'],
... v_attrs=['feat3'])
Use a pre-specified ordering of the edges.
>>> g.edges()
(tensor([0, 1]), tensor([1, 2]))
>>> g = dgl.bipartite_from_networkx(nx_g,
... utype='_U', etype='_E', vtype='_V',
... edge_id_attr_name='eid')
(tensor([1, 0]), tensor([2, 1]))
Create a graph on the first GPU with data type int32.
>>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V',
... idtype=torch.int32, device='cuda:0')
See Also
--------
heterograph
bipartite_from_scipy
"""
if not nx_graph.is_directed():
raise DGLError('Expect nx_graph to be a directed NetworkX graph.')
if edge_id_attr_name is not None and \
not edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]:
raise DGLError('Failed to find the pre-specified edge IDs in the edge features '
'of the NetworkX graph with name {}'.format(edge_id_attr_name))
# Get the source and destination node sets
top_nodes = set()
bottom_nodes = set()
for n, ndata in nx_graph.nodes(data=True):
if 'bipartite' not in ndata:
raise DGLError('Expect the node {} to have attribute bipartite'.format(n))
if ndata['bipartite'] == 0:
top_nodes.add(n)
elif ndata['bipartite'] == 1:
bottom_nodes.add(n)
else:
raise ValueError('Expect the bipartite attribute of the node {} to be 0 or 1, '
'got {}'.format(n, ndata['bipartite']))
# Separately relabel the source and destination nodes.
top_nodes = sorted(top_nodes)
bottom_nodes = sorted(bottom_nodes)
top_map = {n : i for i, n in enumerate(top_nodes)}
bottom_map = {n : i for i, n in enumerate(bottom_nodes)}
# Get the node tensors and the number of nodes
u, v, urange, vrange = utils.graphdata2tensors(
nx_graph, idtype, bipartite=True,
edge_id_attr_name=edge_id_attr_name,
top_map=top_map, bottom_map=bottom_map)
g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
# handle features
# copy attributes
def _batcher(lst):
if F.is_tensor(lst[0]):
return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0)
else:
return F.tensor(lst)
if u_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
src_attr_dict = defaultdict(list)
for nid in top_map.keys():
for attr in u_attrs:
src_attr_dict[attr].append(nx_graph.nodes[nid][attr])
for attr in u_attrs:
g.srcdata[attr] = F.copy_to(_batcher(src_attr_dict[attr]), g.device)
if v_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
dst_attr_dict = defaultdict(list)
for nid in bottom_map.keys():
for attr in v_attrs:
dst_attr_dict[attr].append(nx_graph.nodes[nid][attr])
for attr in v_attrs:
g.dstdata[attr] = F.copy_to(_batcher(dst_attr_dict[attr]), g.device)
if e_attrs is not None:
# mapping from feature name to a list of tensors to be concatenated
attr_dict = defaultdict(lambda: [None] * g.number_of_edges())
# each defaultdict value is initialized to be a list of None
# None here serves as placeholder to be replaced by feature with
# corresponding edge id
if has_edge_id:
for _, _, attrs in nx_graph.edges(data=True):
for key in e_attrs:
attr_dict[key][attrs[edge_id_attr_name]] = attrs[key]
else:
# XXX: assuming networkx iteration order is deterministic
# so the order is the same as graph_index.from_networkx
for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)):
for key in e_attrs:
attr_dict[key][eid] = attrs[key]
for attr in e_attrs:
for val in attr_dict[attr]:
if val is None:
raise DGLError('Not all edges have attribute {}.'.format(attr))
g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device)
return g.to(device)
def to_networkx(g, node_attrs=None, edge_attrs=None):
"""Convert to networkx graph.
"""Convert a homogeneous graph to a NetworkX graph.
The edge id will be saved as the 'id' edge attribute.
It will save the edge IDs as the ``'id'`` edge attribute in the returned NetworkX graph.
Parameters
----------
g : DGLGraph or DGLHeteroGraph
For DGLHeteroGraphs, we currently only support the
case of one node type and one edge type.
g : DGLGraph
A homogeneous graph on CPU.
node_attrs : iterable of str, optional
The node attributes to be copied. (Default: None)
The node attributes to copy from ``g.ndata``. (Default: None)
edge_attrs : iterable of str, optional
The edge attributes to be copied. (Default: None)
The edge attributes to copy from ``g.edata``. (Default: None)
Returns
-------
networkx.DiGraph
The nx graph
The converted NetworkX graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> g = dgl.graph((torch.tensor([1, 2]), torch.tensor([1, 3])))
>>> g.ndata['h'] = torch.zeros(4, 1)
>>> g.edata['h1'] = torch.ones(2, 1)
>>> g.edata['h2'] = torch.zeros(2, 2)
>>> nx_g = dgl.to_networkx(g, node_attrs=['h'], edge_attrs=['h1', 'h2'])
>>> nx_g.nodes(data=True)
NodeDataView({0: {'h': tensor([0.])},
1: {'h': tensor([0.])},
2: {'h': tensor([0.])},
3: {'h': tensor([0.])}})
>>> nx_g.edges(data=True)
OutMultiEdgeDataView([(1, 1, {'id': 0, 'h1': tensor([1.]), 'h2': tensor([0., 0.])}),
(2, 3, {'id': 1, 'h1': tensor([1.]), 'h2': tensor([0., 0.])})])
"""
if g.device != F.cpu():
raise DGLError('Cannot convert a CUDA graph to networkx. Call g.cpu() first.')
if not g.is_homogeneous():
if not g.is_homogeneous:
raise DGLError('dgl.to_networkx only supports homogeneous graphs.')
src, dst = g.edges()
src = F.asnumpy(src)
......@@ -967,8 +1289,7 @@ DGLHeteroGraph.to_networkx = to_networkx
def create_from_edges(u, v,
utype, etype, vtype,
urange, vrange,
validate=True,
formats=['coo', 'csr', 'csc']):
validate=True):
"""Internal function to create a graph from incident nodes with types.
utype could be equal to vtype
......@@ -993,9 +1314,6 @@ def create_from_edges(u, v,
maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
formats : str or list of str
It can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them,
Force the storage formats. Default: ``['coo', 'csr', 'csc']``.
Returns
-------
......@@ -1016,12 +1334,8 @@ def create_from_edges(u, v,
else:
num_ntypes = 2
if 'coo' in formats:
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, formats)
else:
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo']).formats(formats)
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'])
if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype])
else:
......
......@@ -19,7 +19,7 @@ from .. import convert
from .. import batch
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import to_networkx
from ..convert import from_networkx, to_networkx
backend = os.environ.get('DGLBACKEND', 'pytorch')
......@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
test_mask = _sample_mask(idx_test, labels.shape[0])
self._graph = graph
g = dgl_graph(graph)
g = from_networkx(graph)
g.ndata['train_mask'] = generate_mask_tensor(train_mask)
g.ndata['val_mask'] = generate_mask_tensor(val_mask)
......@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
for line in f.readlines():
if line.startswith('graph'):
if len(elist) != 0:
self.graphs.append(dgl_graph(elist))
self.graphs.append(dgl_graph(tuple(zip(*elist))))
elist = []
else:
u, v = line.strip().split(' ')
elist.append((int(u), int(v)))
if len(elist) != 0:
self.graphs.append(dgl_graph(elist))
self.graphs.append(dgl_graph(tuple(zip(*elist))))
with open("{}/pmpds.pkl".format(root), 'rb') as f:
self.pmpds = _pickle_load(f)
self.labels = []
......
......@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):
self.labels.append(self.glabel_dict[glabel])
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(n_nodes)
nlabels = [] # node labels
......
......@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])
>>> g2 = dgl.graph(([0, 2], [2, 3])
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g2 = dgl.graph(([0, 2], [2, 3]))
>>> g2.edata["e"] = th.ones(2, 4)
Save Graphs into file
......
......@@ -6,7 +6,7 @@ import networkx as nx
from .. import backend as F
from .dgl_dataset import DGLDataset
from .utils import deprecate_property
from ..convert import graph as dgl_graph
from ..convert import from_networkx
__all__ = ['KarateClubDataset', 'KarateClub']
......@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
label = np.asarray(
[kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
label = F.tensor(label)
g = dgl_graph(kc_graph)
g = from_networkx(kc_graph)
g.ndata['label'] = label
self._graph = g
self._data = [g]
......
......@@ -6,7 +6,7 @@ import numpy as np
from .dgl_dataset import DGLDataset
from .utils import save_graphs, load_graphs, makedirs
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import from_networkx
from ..transform import add_self_loop
__all__ = ['MiniGCDataset']
......@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
# preprocess
for i in range(self.num_graphs):
# convert to DGLGraph, and add self loops
self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i]))
self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
self.labels = F.tensor(np.array(self.labels).astype(np.int))
def _gen_cycle(self, n):
......
......@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset):
# convert to heterograph
if self.verbose:
print('Convert to heterograph ...')
hg = dgl.to_hetero(g,
ntypes,
etypes,
metagraph=mg)
hg = dgl.to_heterogeneous(g,
ntypes,
etypes,
metagraph=mg)
if self.verbose:
print('#Node types:', len(hg.ntypes))
print('#Canonical edge types:', len(hg.etypes))
......
......@@ -8,7 +8,7 @@ import os
from .dgl_dataset import DGLBuiltinDataset
from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import from_scipy
class RedditDataset(DGLBuiltinDataset):
......@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
# graph
coo_adj = sp.load_npz(os.path.join(
self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
self._graph = dgl_graph(coo_adj)
self._graph = from_scipy(coo_adj)
# features and labels
reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
features = reddit_data["feature"]
......
......@@ -8,7 +8,7 @@ import numpy.random as npr
import scipy as sp
from .dgl_dataset import DGLDataset
from ..convert import graph as dgl_graph
from ..convert import from_scipy
from .. import batch
from .utils import save_info, save_graphs, load_info, load_graphs
......@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
pq = [generator() for _ in range(self._n_graphs)]
else:
raise RuntimeError()
self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
in_degrees = lambda g: g.in_degrees().float()
self._graph_degrees = [in_degrees(g) for g in self._graphs]
......
......@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero(
np.genfromtxt(self._file_path("graph_labels"), dtype=int))
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero(
loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......
......@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
'graph has multiple or no edge types; '\
'please return a dict in negative sampler.'
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0])
neg_edges = {
etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes}
etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
......@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols),
num_nodes=num_nodes, validate=False,
formats=formats,
num_nodes=num_nodes,
idtype=idtype, device=device)
return g
return g.formats(formats)
def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and
......@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
Parameters
----------
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
num_dst_nodes : int
......@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.bipartite((rows, cols),
num_nodes=(num_src_nodes, num_dst_nodes), validate=False,
idtype=idtype, device=device,
formats=formats)
return g
g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
return g.formats(formats)
"""Classes for heterogeneous graphs."""
#pylint: disable= too-many-lines
from collections import defaultdict
from collections import defaultdict, Iterable
from collections.abc import Mapping
from contextlib import contextmanager
import copy
......@@ -75,38 +75,21 @@ class DGLHeteroGraph(object):
One can construct the graph as follows:
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g, devs_g])
Or equivalently
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
... })
:func:`dgl.graph` and :func:`dgl.bipartite` can create a graph from a variety of
data types including:
* edge list
* edge tuples
* networkx graph
* scipy sparse matrix
Click the function names for more details.
Then one can query the graph structure by specifying the ``ntype`` or ``etype`` arguments:
>>> g.number_of_nodes('user')
3
>>> g.number_of_edges('plays')
4
>>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' relation
>>> g.out_degrees(etype='develops') # out-degrees of source nodes of 'develops' edge type
tensor([1, 1])
>>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' relation
>>> g.in_edges(0, etype='develops') # in-edges of destination node 0 of 'develops' edge type
(tensor([0]), tensor([0]))
Or on the sliced graph for an edge type:
......@@ -125,9 +108,10 @@ class DGLHeteroGraph(object):
For example, suppose a graph that has two types of relation "user-watches-movie"
and "user-watches-TV" as follows:
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
>>> GG = dgl.hetero_from_relations([g0, g1]) # Merge the two graphs
>>> GG = dgl.heterograph({
... ('user', 'watches', 'movie'): ([0, 1, 1], [1, 0, 1]),
... ('user', 'watches', 'TV'): ([0, 1], [0, 1])
... })
To distinguish between the two "watches" edge type, one must specify a full triplet:
......@@ -400,11 +384,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Node Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.add_nodes(2)
DGLError: Node type name must be specified
if there are more than one node types.
......@@ -547,7 +531,7 @@ class DGLHeteroGraph(object):
We can also assign features for the new edges in adding new edges.
>>> g.add_edges(torch.tensor([0, 0]), torch.tensor([2, 2]),
>>> {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
... {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
>>> g.edata['h']
tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
......@@ -560,17 +544,17 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Edge Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.add_edges(torch.tensor([3]), torch.tensor([3]))
DGLError: Edge type name must be specified
if there are more than one edge types.
>>> g.number_of_edges('plays')
4
>>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays')
>>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays')
>>> g.number_of_edges('plays')
5
......@@ -696,11 +680,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Edge Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.remove_edges(torch.tensor([0, 1]))
DGLError: Edge type name must be specified
if there are more than one edge types.
......@@ -784,11 +768,11 @@ class DGLHeteroGraph(object):
**Heterogeneous Graphs with Multiple Node Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.remove_nodes(torch.tensor([0, 1]))
DGLError: Node type name must be specified
if there are more than one node types.
......@@ -853,7 +837,7 @@ class DGLHeteroGraph(object):
A uni-bipartite heterograph can further divide its node types into two sets:
SRC and DST. All edges are from nodes in SRC to nodes in DST. The following APIs
can be used to get the nodes and types that belong to SRC and DST sets:
can be used to get the type, data, and nodes that belong to SRC and DST sets:
* :func:`srctype` and :func:`dsttype`
* :func:`srcdata` and :func:`dstdata`
......@@ -867,67 +851,131 @@ class DGLHeteroGraph(object):
@property
def ntypes(self):
"""Return the list of node types of this graph.
"""Return the node types of the graph.
Returns
-------
list of str
Each ``str`` is a node type.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.ntypes
['user', 'game']
['game', 'user']
"""
return self._ntypes
@property
def etypes(self):
"""Return the list of edge types of this graph.
"""Return the edge types of the graph.
Returns
-------
list of str
Each ``str`` is an edge type.
Notes
-----
An edge type can appear in multiple canonical edge types. For example, ``'interacts'``
can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and
``('protein', 'interacts', 'protein')``. It is recommended to use
:func:`~dgl.DGLGraph.canonical_etypes` in this case.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.etypes
['follows', 'plays']
['follows', 'follows', 'plays']
"""
return self._etypes
@property
def canonical_etypes(self):
"""Return the list of canonical edge types of this graph.
"""Return the canonical edge types of the graph.
A canonical edge type is a tuple of string (src_type, edge_type, dst_type).
A canonical edge type is a 3-tuple of str ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are the type of the source nodes, edges
and destination nodes respectively.
Returns
-------
list of 3-tuples
list of 3-tuple of str
Each 3-tuple of str is a canonical edge type.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.canonical_etypes
[('user', 'follows', 'user'), ('user', 'plays', 'game')]
[('user', 'follows', 'user'),
('user', 'follows', 'game'),
('user', 'plays', 'game')]
"""
return self._canonical_etypes
@property
def srctypes(self):
"""Return the node types in the SRC category. Return :attr:``ntypes`` if
the graph is not a uni-bipartite graph.
"""Return the source node types.
Returns
-------
list of str
* If the graph is a uni-bipartite graph, it returns the source node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.srctypes
['developer', 'user']
Query for a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.srctypes
['developer', 'game', 'user']
"""
if self.is_unibipartite:
return sorted(list(self._srctypes_invmap.keys()))
......@@ -936,8 +984,41 @@ class DGLHeteroGraph(object):
@property
def dsttypes(self):
"""Return the node types in the DST category. Return :attr:``ntypes`` if
the graph is not a uni-bipartite graph.
"""Return the destination node types.
Returns
-------
list of str
Each str is a node type.
* If the graph is a uni-bipartite graph, it returns the destination node types.
For a definition of uni-bipartite, see :func:`is_unibipartite`.
* Otherwise, it returns all node types in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.dsttypes
['game']
Query for a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
>>> g.dsttypes
['developer', 'game', 'user']
"""
if self.is_unibipartite:
return sorted(list(self._dsttypes_invmap.keys()))
......@@ -945,33 +1026,34 @@ class DGLHeteroGraph(object):
return self.ntypes
def metagraph(self):
"""Return the metagraph as networkx.MultiDiGraph.
"""Return the metagraph of the heterograph.
The nodes are labeled with node type names.
The edges have their keys holding the edge type names.
The metagraph (or network schema) of a heterogeneous network specifies type constraints
on the sets of nodes and edges between the nodes. For a formal definition, refer to
`Yizhou et al. <https://www.kdd.org/exploration_files/V14-02-03-Sun.pdf>`_.
Returns
-------
networkx.MultiDiGraph
The metagraph.
Examples
--------
The following example uses PyTorch backend.
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> meta_g = g.metagraph()
The metagraph then has two nodes and two edges.
>>> import dgl
>>> import torch
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> meta_g = g.metagraph()
>>> meta_g.nodes()
NodeView(('user', 'game'))
>>> meta_g.number_of_nodes()
2
>>> meta_g.edges()
OutMultiEdgeDataView([('user', 'user'), ('user', 'game')])
>>> meta_g.number_of_edges()
2
OutMultiEdgeDataView([('user', 'user'), ('user', 'game'), ('user', 'game')])
"""
nx_graph = self._graph.metagraph.to_networkx()
nx_metagraph = nx.MultiDiGraph()
......@@ -981,38 +1063,56 @@ class DGLHeteroGraph(object):
return nx_metagraph
def to_canonical_etype(self, etype):
"""Convert edge type to canonical etype: (srctype, etype, dsttype).
"""Convert an edge type to the corresponding canonical edge type in the graph.
The input can already be a canonical tuple.
A canonical edge type is a 3-tuple of strings ``src_type, edge_type, dst_type``, where
``src_type``, ``edge_type``, ``dst_type`` are separately the type of source
nodes, edges and destination nodes.
Parameters
----------
etype : str or tuple of str
Edge type
etype : str or 3-tuple of str
If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge
type in the graph. If :attr:`etype` is already a canonical edge type
(3-tuple of str), it simply returns :attr:`etype`.
Returns
-------
tuple of str
3-tuple of str
The canonical edge type corresponding to the edge type.
Notes
-----
If :attr:`etype` is an edge type, the API expects it to appear only once in the graph. For
example, in a graph with canonical edge types ``('A', 'follows', 'B')``,
``('A', 'follows', 'C')`` and ``('B', 'watches', 'D')``, ``'follows'`` is an invalid value
for :attr:`etype` while ``'watches'`` is a valid one.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
Create a heterograph.
>>> g1 = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g2 = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g3 = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'follows', 'game')
>>> g = dgl.hetero_from_relations([g1, g2, g3])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]),
... ('developer', 'follows', 'game'): ([0, 1], [0, 1])
... })
Get canonical edge types.
Map an edge type to its corresponding canonical edge type.
>>> g.to_canonical_etype('plays')
('user', 'plays', 'game')
>>> g.to_canonical_etype(('user', 'plays', 'game'))
('user', 'plays', 'game')
>>> g.to_canonical_etype('follows')
DGLError: Edge type "follows" is ambiguous.
Please use canonical etype type in the form of (srctype, etype, dsttype)
See Also
--------
canonical_etypes
"""
if etype is None:
if len(self.etypes) != 1:
......@@ -1026,8 +1126,8 @@ class DGLHeteroGraph(object):
if ret is None:
raise DGLError('Edge type "{}" does not exist.'.format(etype))
if len(ret) == 0:
raise DGLError('Edge type "%s" is ambiguous. Please use canonical etype '
'type in the form of (srctype, etype, dsttype)' % etype)
raise DGLError('Edge type "%s" is ambiguous. Please use canonical edge type '
'in the form of (srctype, etype, dsttype)' % etype)
return ret
def get_ntype_id(self, ntype):
......@@ -1144,11 +1244,93 @@ class DGLHeteroGraph(object):
#################################################################
@property
def batch_size(self):
"""TBD"""
"""Return the number of graphs in the batched graph.
Returns
-------
int
The Number of graphs in the batch. If the graph is not a batched one,
it will return 1.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_size
1
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_size
2
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg1.batch_size
1
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_size
2
"""
return len(self.batch_num_nodes(self.ntypes[0]))
def batch_num_nodes(self, ntype=None):
"""TBD"""
"""Return the number of nodes for each graph in the batch with the specified node type.
Parameters
----------
ntype : str, optional
The node type for query. If the graph has multiple node types, one must
specify the argument. Otherwise, it can be omitted. If the graph is not a batched
one, it will return a list of length 1 that holds the number of nodes in the graph.
Returns
-------
Tensor
The number of nodes with the specified type for each graph in the batch. The i-th
element of it is the number of nodes with the specified type for the i-th graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_num_nodes()
tensor([4])
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_num_nodes()
tensor([4, 3])
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_num_nodes('user')
tensor([2, 1])
"""
if ntype is not None and ntype not in self.ntypes:
raise DGLError('Expect ntype in {}, got {}'.format(self.ntypes, ntype))
if self._batch_num_nodes is None:
self._batch_num_nodes = {}
for ty in self.ntypes:
......@@ -1170,7 +1352,52 @@ class DGLHeteroGraph(object):
self._batch_num_nodes = val
def batch_num_edges(self, etype=None):
"""TBD"""
"""Return the number of edges for each graph in the batch with the specified edge type.
Parameters
----------
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
Tensor
The number of edges with the specified type for each graph in the batch. The i-th
element of it is the number of edges with the specified type for the i-th graph.
If the graph is not a batched one, it will return a list of length 1 that holds
the number of edges in the graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for homogeneous graphs.
>>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g1.batch_num_edges()
tensor([3])
>>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0])))
>>> bg = dgl.batch([g1, g2])
>>> bg.batch_num_edges()
tensor([3, 4])
Query for heterogeneous graphs.
>>> hg1 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))})
>>> hg2 = dgl.heterograph({
... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))})
>>> bg = dgl.batch([hg1, hg2])
>>> bg.batch_num_edges('plays')
tensor([2, 2])
"""
if self._batch_num_edges is None:
self._batch_num_edges = {}
for ty in self.canonical_etypes:
......@@ -1181,6 +1408,8 @@ class DGLHeteroGraph(object):
raise DGLError('Edge type name must be specified if there are more than one '
'edge types.')
etype = self.canonical_etypes[0]
else:
etype = self.to_canonical_etype(etype)
return self._batch_num_edges[etype]
def set_batch_num_edges(self, val):
......@@ -1197,37 +1426,114 @@ class DGLHeteroGraph(object):
@property
def nodes(self):
"""Return a node view that can be used to set/get feature
data of a single node type.
"""Return a node view
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples
--------
The following example uses PyTorch backend.
To set features of all users
>>> import dgl
>>> import torch
Create a homogeneous graph and a heterogeneous graph of two node types.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Get the node IDs of the homogeneous graph.
>>> g.nodes()
tensor([0, 1, 2])
Get the node IDs of the heterogeneous graph. With multiple node types introduced,
one needs to specify the node type for query.
>>> hg.nodes('user')
tensor([0, 1, 2, 3, 4])
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.nodes['user'].data['h'] = torch.zeros(3, 5)
Set and get a feature 'h' for all nodes of a single type in the heterogeneous graph.
>>> hg.nodes['user'].data['h'] = torch.ones(5, 1)
>>> hg.nodes['user'].data['h']
tensor([[1.], [1.], [1.], [1.], [1.]])
To set node features for a graph with a single node type, use :func:`DGLGraph.ndata`.
See Also
--------
ndata
"""
# Todo (Mufei) Replace the syntax g.nodes[...].ndata[...] with g.nodes[...][...]
return HeteroNodeView(self, self.get_ntype_id)
@property
def srcnodes(self):
"""Return a SRC node view that can be used to set/get feature
data of a single node type.
"""Return a node view for source nodes
If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
this is :func:`nodes` restricted to source node types. Otherwise, it is an alias
for :func:`nodes`.
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples
--------
The following example uses PyTorch backend.
To set features of all users
>>> import dgl
>>> import torch
Create a uni-bipartite graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
Get the node IDs for source node types.
>>> g.srcnodes('user')
tensor([0])
>>> g.srcnodes('developer')
tensor([0, 1])
Set/get features for source node types.
>>> g.srcnodes['user'].data['h'] = torch.ones(1, 1)
>>> g.srcnodes['user'].data['h']
tensor([[1.]])
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.srcnodes['user'].data['h'] = torch.zeros(2, 5)
Create a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
:func:`dgl.DGLGraph.srcnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
get the node IDs for both source and destination node types.
>>> g.srcnodes('game')
tensor([0, 1, 2])
One can also set/get features for destination node types in this case.
>>> g.srcnodes['game'].data['h'] = torch.ones(3, 1)
>>> g.srcnodes['game'].data['h']
tensor([[1.],
[1.],
[1.]])
See Also
--------
......@@ -1237,17 +1543,63 @@ class DGLHeteroGraph(object):
@property
def dstnodes(self):
"""Return a DST node view that can be used to set/get feature
data of a single node type.
"""Return a node view for destination nodes
If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference),
this is :func:`nodes` restricted to destination node types. Otherwise, it is an alias
for :func:`nodes`.
One can use it for:
1. Getting the node IDs for a single node type.
2. Setting/getting features for all nodes of a single node type.
Examples
--------
The following example uses PyTorch backend.
To set features of all games
>>> import dgl
>>> import torch
Create a uni-bipartite graph.
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.dstnodes['game'].data['h'] = torch.zeros(3, 5)
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
Get the node IDs for destination node types.
>>> g.dstnodes('game')
tensor([0, 1, 2])
Set/get features for destination node types.
>>> g.dstnodes['game'].data['h'] = torch.ones(3, 1)
>>> g.dstnodes['game'].data['h']
tensor([[1.],
[1.],
[1.]])
Create a graph that is not uni-bipartite.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])),
... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2]))
... })
:func:`dgl.DGLGraph.dstnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can
get the node IDs for both source and destination node types.
>>> g.dstnodes('developer')
tensor([0, 1])
One can also set/get features for source node types in this case.
>>> g.dstnodes['developer'].data['h'] = torch.ones(2, 1)
>>> g.dstnodes['developer'].data['h']
tensor([[1.],
[1.]])
See Also
--------
......@@ -1257,50 +1609,53 @@ class DGLHeteroGraph(object):
@property
def ndata(self):
"""Return the data view of all the nodes.
"""Return a node data view for setting/getting node features
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single node type, ``g.ndata[feat]``
returns the node feature associated with the name ``feat``. One can also set a node
feature associated with the name ``feat`` by setting ``g.ndata[feat]`` to a tensor.
If the graph has only one node type, ``g.ndata['feat']`` gives
the node feature data under name ``'feat'``.
If the graph has multiple node types, then ``g.ndata['feat']``
returns a dictionary where the key is the node type and the
value is the node feature tensor. If the node type does not
have feature `'feat'`, it is not included in the dictionary.
If ``g`` is a graph of multiple node types, ``g.ndata[feat]`` returns a
dict[str, Tensor] mapping node types to the node features associated with the name
``feat`` for the corresponding type. One can also set a node feature associated
with the name ``feat`` for some node type(s) by setting ``g.ndata[feat]`` to a
dictionary as described.
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples
--------
The following example uses PyTorch backend.
To set features of all nodes in a heterogeneous graph
with only one node type:
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.ndata['h'] = torch.zeros(3, 5)
>>> import dgl
>>> import torch
To set features of all nodes in a heterogeneous graph
with multiple node types:
Set and get feature 'h' for a graph of a single node type.
>>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])})
>>> g.ndata['h'] = {'user': torch.zeros(2, 5),
... 'movie': torch.zeros(3, 5)}
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.ndata['h'] = torch.ones(3, 1)
>>> g.ndata['h']
... {'user': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... 'movie': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
tensor([[1.],
[1.],
[1.]])
To set features of part of nodes in a heterogeneous graph
with multiple node types:
Set and get feature 'h' for a graph of multiple node types.
>>> g = dgl.heterograph({('user', 'like', 'movie') : ([0, 1, 1], [1, 2, 0])})
>>> g.ndata['h'] = {'user': torch.zeros(2, 5)}
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... })
>>> g.ndata['h'] = {'game': torch.zeros(2, 1), 'player': torch.ones(3, 1)}
>>> g.ndata['h']
... {'user': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
>>> # clean the feature 'h' and no node type contains 'h'
>>> g.ndata.pop('h')
{'game': tensor([[0.], [0.]]),
'player': tensor([[1.], [1.], [1.]])}
>>> g.ndata['h'] = {'game': torch.ones(2, 1)}
>>> g.ndata['h']
... {}
{'game': tensor([[1.], [1.]]),
'player': tensor([[1.], [1.], [1.]])}
See Also
--------
......@@ -1315,84 +1670,62 @@ class DGLHeteroGraph(object):
ntypes = self.ntypes
return HeteroNodeDataView(self, ntypes, ntids, ALL)
@property
def srcdata(self):
"""Return the data view of all nodes in the SRC category.
"""Return a node data view for setting/getting source node features.
If the source nodes have only one node type, ``g.srcdata['feat']``
gives the node feature data under name ``'feat'``.
If the source nodes have multiple node types, then
``g.srcdata['feat']`` returns a dictionary where the key is
the source node type and the value is the node feature
tensor. If the source node type does not have feature
`'feat'`, it is not included in the dictionary.
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single source node type,
``g.srcdata[feat]`` returns the source node feature associated with the name ``feat``.
One can also set a source node feature associated with the name ``feat`` by
setting ``g.srcdata[feat]`` to a tensor.
If ``g`` is a graph of multiple source node types, ``g.srcdata[feat]`` returns a
dict[str, Tensor] mapping source node types to the node features associated with
the name ``feat`` for the corresponding type. One can also set a node feature
associated with the name ``feat`` for some source node type(s) by setting
``g.srcdata[feat]`` to a dictionary as described.
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples
--------
The following example uses PyTorch backend.
To set features of all source nodes in a graph with only one edge type:
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.srcdata['h'] = torch.zeros(2, 5)
This is equivalent to
>>> g.nodes['user'].data['h'] = torch.zeros(2, 5)
>>> import dgl
>>> import torch
Also work on more complex uni-bipartite graph
Set and get feature 'h' for a graph of a single source node type.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]),
... ('user', 'reads', 'book') : ([0, 1], [1, 0]),
... })
>>> print(g.is_unibipartite)
True
>>> g.srcdata['h'] = torch.zeros(2, 5)
... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
>>> g.srcdata['h'] = torch.ones(2, 1)
>>> g.srcdata['h']
tensor([[1.],
[1.]])
To set features of all source nodes in a uni-bipartite graph
with multiple source node types:
Set and get feature 'h' for a graph of multiple source node types.
>>> g = dgl.heterograph({
... ('game', 'liked-by', 'user') : ([1, 2], [0, 1]),
... ('book', 'liked-by', 'user') : ([0, 1], [1, 0]),
... })
>>> print(g.is_unibipartite)
True
>>> g.srcdata['h'] = {'game' : torch.zeros(3, 5),
... 'book' : torch.zeros(2, 5)}
>>> g.srcdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... 'book': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of source nodes in a uni-bipartite graph
with multiple source node types:
>>> g = dgl.heterograph({
... ('game', 'liked-by', 'user') : ([1, 2], [0, 1]),
... ('book', 'liked-by', 'user') : ([0, 1], [1, 0]),
... })
>>> g.srcdata['h'] = {'game' : torch.zeros(3, 5)}
... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... })
>>> g.srcdata['h'] = {'user': torch.zeros(3, 1), 'player': torch.ones(3, 1)}
>>> g.srcdata['h']
>>> {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
>>> # clean the feature 'h' and no source node type contains 'h'
>>> g.srcdata.pop('h')
{'player': tensor([[1.], [1.], [1.]]),
'user': tensor([[0.], [0.], [0.]])}
>>> g.srcdata['h'] = {'user': torch.ones(3, 1)}
>>> g.srcdata['h']
... {}
Notes
-----
This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
{'player': tensor([[1.], [1.], [1.]]),
'user': tensor([[1.], [1.], [1.]])}
See Also
--------
nodes
ndata
srcnodes
"""
if len(self.srctypes) == 1:
ntype = self.srctypes[0]
......@@ -1405,81 +1738,61 @@ class DGLHeteroGraph(object):
@property
def dstdata(self):
"""Return the data view of all destination nodes.
"""Return a node data view for setting/getting destination node features.
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single destination node type,
``g.dstdata[feat]`` returns the destination node feature associated with the name
``feat``. One can also set a destination node feature associated with the name
``feat`` by setting ``g.dstdata[feat]`` to a tensor.
If the destination nodes have only one node type,
``g.dstdata['feat']`` gives the node feature data under name
``'feat'``.
If the destination nodes have multiple node types, then
``g.dstdata['feat']`` returns a dictionary where the key is
the destination node type and the value is the node feature
tensor. If the destination node type does not have feature
`'feat'`, it is not included in the dictionary.
If ``g`` is a graph of multiple destination node types, ``g.dstdata[feat]`` returns a
dict[str, Tensor] mapping destination node types to the node features associated with
the name ``feat`` for the corresponding type. One can also set a node feature
associated with the name ``feat`` for some destination node type(s) by setting
``g.dstdata[feat]`` to a dictionary as described.
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples
--------
The following example uses PyTorch backend.
To set features of all source nodes in a graph with only one edge type:
>>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.dstdata['h'] = torch.zeros(3, 5)
This is equivalent to
>>> g.nodes['game'].data['h'] = torch.zeros(3, 5)
Also work on more complex uni-bipartite graph
Set and get feature 'h' for a graph of a single destination node type.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]),
... ('store', 'sells', 'game') : ([0, 1], [1, 0]),
... })
>>> print(g.is_unibipartite)
True
>>> g.dstdata['h'] = torch.zeros(3, 5)
... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
>>> g.dstdata['h'] = torch.ones(3, 1)
>>> g.dstdata['h']
tensor([[1.],
[1.],
[1.]])
To set features of all destination nodes in a uni-bipartite graph
with multiple destination node types::
Set and get feature 'h' for a graph of multiple destination node types.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]),
... ('user', 'reads', 'book') : ([0, 1], [1, 0]),
... })
>>> print(g.is_unibipartite)
True
>>> g.dstdata['h'] = {'game' : torch.zeros(3, 5),
... 'book' : torch.zeros(2, 5)}
>>> g.dstdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... 'book': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of destination nodes in a uni-bipartite graph
with multiple destination node types:
>>> g = dgl.heterograph({
... ('user', 'plays', 'game') : ([0, 1], [1, 2]),
... ('user', 'reads', 'book') : ([0, 1], [1, 0]),
... })
>>> g.dstdata['h'] = {'game' : torch.zeros(3, 5)}
... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([1, 2])),
... ('user', 'watches', 'movie'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... })
>>> g.dstdata['h'] = {'game': torch.zeros(3, 1), 'movie': torch.ones(2, 1)}
>>> g.dstdata['h']
... {'game': tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
>>> # clean the feature 'h' and no destination node type contains 'h'
>>> g.dstdata.pop('h')
{'game': tensor([[0.], [0.], [0.]]),
'movie': tensor([[1.], [1.]])}
>>> g.dstdata['h'] = {'game': torch.ones(3, 1)}
>>> g.dstdata['h']
... {}
Notes
-----
This is identical to :any:`DGLHeteroGraph.ndata` if the graph is homogeneous.
{'game': tensor([[1.], [1.], [1.]]),
'movie': tensor([[1.], [1.]])}
See Also
--------
nodes
ndata
dstnodes
"""
if len(self.dsttypes) == 1:
ntype = self.dsttypes[0]
......@@ -1492,78 +1805,142 @@ class DGLHeteroGraph(object):
@property
def edges(self):
"""Return an edge view that can be used to set/get feature
data of a single edge type.
"""Return an edge view
One can use it for:
1. Getting the edges for a single edge type. In this case, it can take the
following optional arguments:
- form : str, optional
The return form, which can be one of the following:
- ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors
:math:`(U, V)`, representing the source and destination nodes of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge.
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
- order : str, optional
The order of the returned edges, which can be one of the following:
- ``'eid'`` (default): The edges are sorted by their IDs.
- ``'srcdst'``: The edges are sorted first by their source node IDs and then
by their destination node IDs to break ties.
- etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge
type (3-tuple of str). When an edge type appears in multiple canonical edge
types, one must use a canonical edge type. If the graph has multiple edge
types, one must specify the argument. Otherwise, it can be omitted.
2. Setting/getting features for all edges of a single edge type. To set/get a feature
``feat`` for edges of type ``etype`` in a graph ``g``, one can use
``g.edges[etype].data[feat]``.
Examples
--------
The following example uses PyTorch backend.
To set features of all "play" relationships:
>>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.edges['plays'].data['h'] = torch.zeros(3, 4)
**Get the Edges for a Single Edge Type**
Create a graph with a single edge type.
>>> g = dgl.graph((torch.tensor([1, 0, 0]), torch.tensor([1, 1, 0])))
>>> g.edges()
(tensor([1, 0, 0]), tensor([1, 1, 0]))
Specify a different value for :attr:`form` and :attr:`order`.
>>> g.edges(form='all', order='srcdst')
(tensor([0, 0, 1]), tensor([0, 1, 1]), tensor([2, 1, 0]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.edges(etype='plays')
(tensor([3, 4]), tensor([5, 6]))
**Set/get Features for All Edges of a Single Edge Type**
Create a heterogeneous graph of two edge types.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Set and get a feature 'h' for all edges of a single type in the heterogeneous graph.
>>> hg.edges['follows'].data['h'] = torch.ones(2, 1)
>>> hg.edges['follows'].data['h']
tensor([[1.], [1.]])
To set edge features for a graph with a single edge type, use :func:`DGLGraph.edata`.
See Also
--------
edata
"""
# TODO(Mufei): Replace the syntax g.edges[...].edata[...] with g.edges[...][...]
return HeteroEdgeView(self)
@property
def edata(self):
"""Return the data view of all the edges.
"""Return an edge data view for setting/getting edge features.
Let ``g`` be a DGLGraph. If ``g`` is a graph of a single edge type, ``g.edata[feat]``
returns the edge feature associated with the name ``feat``. One can also set an
edge feature associated with the name ``feat`` by setting ``g.edata[feat]`` to a tensor.
If the graph has only one edge type, ``g.edata['feat']`` gives the
edge feature data under name ``'feat'``.
If the graph has multiple edge types, then ``g.edata['feat']``
returns a dictionary where the key is the edge type and the value
is the edge feature tensor. If the edge type does not have feature
``'feat'``, it is not included in the dictionary.
If ``g`` is a graph of multiple edge types, ``g.edata[feat]`` returns a
dict[str, Tensor] mapping canonical edge types to the edge features associated with
the name ``feat`` for the corresponding type. One can also set an edge feature
associated with the name ``feat`` for some edge type(s) by setting
``g.edata[feat]`` to a dictionary as described.
Note: When the graph has multiple edge type, The key used in
``g.edata['feat']`` should be the canonical_etypes, i.e.
(h_ntype, r_type, t_ntype).
Notes
-----
For setting features, the device of the features must be the same as the device
of the graph.
Examples
--------
The following example uses PyTorch backend.
To set features of all edges in a heterogeneous graph
with only one edge type:
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.edata['h'] = torch.zeros(2, 5)
>>> import dgl
>>> import torch
To set features of all edges in a heterogeneous graph
with multiple edge types:
Set and get feature 'h' for a graph of a single edge type.
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
>>> g = dgl.hetero_from_relations([g0, g1])
>>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5),
('user', 'watches', 'TV') : torch.zeros(2, 5)}
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.edata['h'] = torch.ones(2, 1)
>>> g.edata['h']
... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]]),
... ('user', 'watches', 'TV'): tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
To set features of part of edges in a heterogeneous graph
with multiple edge types:
>>> g0 = dgl.bipartite(([0, 1, 1], [1, 0, 1]), 'user', 'watches', 'movie')
>>> g1 = dgl.bipartite(([0, 1], [0, 1]), 'user', 'watches', 'TV')
>>> g = dgl.hetero_from_relations([g0, g1])
>>> g.edata['h'] = {('user', 'watches', 'movie') : torch.zeros(3, 5)}
tensor([[1.],
[1.]])
Set and get feature 'h' for a graph of multiple edge types.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('user', 'plays', 'user'): (torch.tensor([2, 2]), torch.tensor([1, 1])),
... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1]))
... })
>>> g.edata['h'] = {('user', 'follows', 'user'): torch.zeros(2, 1),
... ('user', 'plays', 'user'): torch.ones(2, 1)}
>>> g.edata['h']
... {('user', 'watches', 'movie'): tensor([[0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.],
... [0., 0., 0., 0., 0.]])}
>>> # clean the feature 'h' and no edge type contains 'h'
>>> g.edata.pop('h')
{('user', 'follows', 'user'): tensor([[0.], [0.]]),
('user', 'plays', 'user'): tensor([[1.], [1.]])}
>>> g.edata['h'] = {('user', 'follows', 'user'): torch.ones(2, 1)}
>>> g.edata['h']
... {}
{('user', 'follows', 'user'): tensor([[1.], [1.]]),
('user', 'plays', 'user'): tensor([[1.], [1.]])}
See Also
--------
......@@ -1598,7 +1975,7 @@ class DGLHeteroGraph(object):
equivalent to ``self.edge_type_subgraph(etype)``. The node and edge features
of the returned graph would be shared with thew original graph.
If there are multiple canonical edge type found, then the source/edge/destination
If there are multiple canonical edge types found, then the source/edge/destination
node types would be a *concatenation* of original node/edge types. The
new source/destination node type would have the concatenation determined by
:func:`dgl.combine_names() <dgl.combine_names>` called on original source/destination
......@@ -1680,40 +2057,65 @@ class DGLHeteroGraph(object):
#################################################################
def number_of_nodes(self, ntype=None):
"""Return the number of nodes of the given type in the heterograph.
"""Alias of :func:`num_nodes`"""
return self.num_nodes(ntype)
def num_nodes(self, ntype=None):
"""Return the number of nodes.
Parameters
----------
ntype : str, optional
The node type. Can be omitted if there is only one node type
in the graph. (Default: None)
The node type for query. If given, it returns the number of nodes for a particular
type. If not given (default), it returns the total number of nodes of all types.
Returns
-------
int
The number of nodes
The number of nodes.
Examples
--------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.number_of_nodes('user')
3
>>> g.number_of_nodes()
3
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph with two node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_nodes('user')
5
>>> g.num_nodes('game')
7
>>> g.num_nodes()
12
"""
return self._graph.number_of_nodes(self.get_ntype_id(ntype))
if ntype is None:
return sum([self._graph.number_of_nodes(ntid) for ntid in range(len(self.ntypes))])
else:
return self._graph.number_of_nodes(self.get_ntype_id(ntype))
def number_of_src_nodes(self, ntype=None):
"""Return the number of nodes of the given SRC node type in the heterograph.
"""Alias of :func:`num_src_nodes`"""
return self.num_src_nodes(ntype)
The heterograph is usually a unidirectional bipartite graph.
def num_src_nodes(self, ntype=None):
"""Return the number of nodes of the given source node type.
Parameters
----------
ntype : str, optional
Node type.
If omitted, there should be only one node type in the SRC category.
The source node type for query. If given, it returns the number of nodes for a
particular source node type. If not given (default), it returns the number of
nodes summed over all source node types.
Returns
-------
......@@ -1722,26 +2124,52 @@ class DGLHeteroGraph(object):
Examples
--------
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.number_of_src_nodes('user')
2
>>> g.number_of_src_nodes()
2
>>> g.number_of_nodes('user')
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph for query.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_src_nodes()
3
Create a heterogeneous graph with two source node types -- 'developer' and 'user'.
>>> g = dgl.heterograph({
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_src_nodes('developer')
2
>>> g.num_src_nodes('user')
5
>>> g.num_src_nodes()
7
"""
return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype))
if ntype is None:
return sum([self._graph.number_of_nodes(self.get_ntype_id_from_src(nty))
for nty in self.srctypes])
else:
return self._graph.number_of_nodes(self.get_ntype_id_from_src(ntype))
def number_of_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given DST node type in the heterograph.
"""Alias of :func:`num_dst_nodes`"""
return self.num_dst_nodes(ntype)
The heterograph is usually a unidirectional bipartite graph.
def num_dst_nodes(self, ntype=None):
"""Return the number of nodes of the given destination node type.
Parameters
----------
ntype : str, optional
Node type.
If omitted, there should be only one node type in the DST category.
The destination node type for query. If given, it returns the number of nodes for a
particular destination node type. If not given (default), it returns the number of
nodes summed over all destination node types.
Returns
-------
......@@ -1750,42 +2178,95 @@ class DGLHeteroGraph(object):
Examples
--------
>>> g = dgl.bipartite(([0, 1], [1, 2]), 'user', 'plays', 'game')
>>> g.number_of_dst_nodes('game')
3
>>> g.number_of_dst_nodes()
3
>>> g.number_of_nodes('game')
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph for query.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.num_dst_nodes()
3
Create a heterogeneous graph with two destination node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
Query for the number of nodes.
>>> g.num_dst_nodes('user')
5
>>> g.num_dst_nodes('game')
7
>>> g.num_dst_nodes()
12
"""
return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype))
if ntype is None:
return sum([self._graph.number_of_nodes(self.get_ntype_id_from_dst(nty))
for nty in self.dsttypes])
else:
return self._graph.number_of_nodes(self.get_ntype_id_from_dst(ntype))
def number_of_edges(self, etype=None):
"""Return the number of edges of the given type in the heterograph.
"""Alias of :func:`num_edges`"""
return self.num_edges(etype)
def num_edges(self, etype=None):
"""Return the number of edges.
Parameters
----------
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph.
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns
-------
int
The number of edges
The number of edges.
Examples
--------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g.number_of_edges(('user', 'follows', 'user'))
2
>>> g.number_of_edges('follows')
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph with three canonical edge types.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
Query for the number of edges.
>>> g.num_edges('plays')
2
>>> g.number_of_edges()
>>> g.num_edges()
7
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.num_edges(('user', 'follows', 'user'))
2
>>> g.num_edges(('user', 'follows', 'game'))
3
"""
return self._graph.number_of_edges(self.get_etype_id(etype))
if etype is None:
return sum([self._graph.number_of_edges(etid)
for etid in range(len(self.canonical_etypes))])
else:
return self._graph.number_of_edges(self.get_etype_id(etype))
def __len__(self):
"""Deprecated: please directly call :func:`number_of_nodes`
......@@ -1798,13 +2279,91 @@ class DGLHeteroGraph(object):
def is_multigraph(self):
"""Whether the graph is a multigraph
In a multigraph, there can be multiple edges from a node ``u`` to a node ``v``.
For a heterogeneous graph of multiple canonical edge types, we consider it as a
multigraph if there are multiple edges from a node ``u`` to a node ``v`` for any
canonical edge type.
Returns
-------
bool
True if the graph is a multigraph, False otherwise.
Whether the graph is a multigraph.
Notes
-----
Checking whether the graph is a multigraph can be expensive for a large one.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Check for homogeneous graphs.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 3])))
>>> g.is_multigraph
False
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 3, 3])))
>>> g.is_multigraph
True
Check for heterogeneous graphs.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
... })
>>> g.is_multigraph
False
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1]), torch.tensor([1, 2, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))
... })
>>> g.is_multigraph
True
"""
return self._graph.is_multigraph()
@property
def is_homogeneous(self):
"""Whether the graph is a homogeneous graph.
A homogeneous graph only has one node type and one edge type.
Returns
-------
bool
Whether the graph is a homogeneous graph.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph for check.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
>>> g.is_homogeneous
True
Create a heterogeneous graph for check.
If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))})
>>> g.is_homogeneous
False
"""
return len(self.ntypes) == 1 and len(self.etypes) == 1
@property
def is_readonly(self):
"""Deprecated: DGLGraph will always be mutable.
......@@ -1821,12 +2380,30 @@ class DGLHeteroGraph(object):
@property
def idtype(self):
"""The dtype of graph index
"""The data type for storing the structure-related graph information
such as node and edge IDs.
Returns
-------
backend dtype object
th.int32/th.int64 or tf.int32/tf.int64 etc.
Framework-specific device object
For example, this can be ``torch.int32`` or ``torch.int64`` for PyTorch.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> src_ids = torch.tensor([0, 0, 1])
>>> dst_ids = torch.tensor([1, 2, 2])
>>> g = dgl.graph((src_ids, dst_ids))
>>> g.idtype
torch.int64
>>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32)
>>> g.idtype
torch.int32
See Also
--------
......@@ -1854,41 +2431,68 @@ class DGLHeteroGraph(object):
return self.has_nodes(vid)
def has_nodes(self, vid, ntype=None):
"""Whether the graph has a node with a particular id and type.
"""Whether the graph has some particular node(s) of a given type.
Parameters
----------
vid : int, iterable, tensor
Node ID(s).
vid : node ID(s)
The node ID(s) for query. The allowed formats are:
- ``int``: The ID of a single node.
- ``Tensor``: A 1D tensor that contains the IDs of multiple nodes, whose data type and
device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: A sequence (e.g. list, tuple, numpy.ndarray)
of integers that contains the IDs of multiple nodes.
ntype : str, optional
The node type. Can be omitted if there is only one node type
in the graph. (Default: None)
The node type for query. It is required if the graph has
multiple node types.
Returns
-------
bool or bool Tensor
Each element is a bool flag, which is True if the node exists,
and is False otherwise.
- If :attr:`vid` is an ``int``, the result will be a ``bool`` indicating
whether the graph has the particular node.
- If :attr:`vid` is a 1D ``Tensor`` or ``iterable[int]`` of node IDs,
the result will be a bool Tensor whose i-th element indicates whether
the graph has node :attr:`vid[i]` of the given type.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph with two node types -- 'user' and 'game'.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([0, 1]))
... })
Query for the nodes.
>>> g.has_nodes(0, 'user')
True
>>> g.has_nodes(4, 'user')
>>> g.has_nodes(3, 'game')
False
>>> g.has_nodes([0, 1, 2, 3, 4], 'user')
tensor([True, True, True, False, False])
>>> g.has_nodes(torch.tensor([3, 0, 1]), 'game')
tensor([False, True, True])
"""
vid_tensor = utils.prepare_tensor(self, vid, "vid")
if len(vid_tensor) > 0 and F.as_scalar(F.min(vid_tensor, 0)) < 0 < len(vid_tensor):
raise DGLError('All IDs must be non-negative integers.')
ret = self._graph.has_nodes(
self.get_ntype_id(ntype),
utils.prepare_tensor(self, vid, "vid"))
self.get_ntype_id(ntype), vid_tensor)
if isinstance(vid, numbers.Integral):
return bool(F.as_scalar(ret))
else:
return F.astype(ret, F.bool)
def has_node(self, vid, ntype=None):
"""Whether the graph has a node with ids and a particular type.
"""Whether the graph has a particular node of a given type.
DEPRECATED: see :func:`~DGLGraph.has_nodes`
"""
......@@ -1896,38 +2500,93 @@ class DGLHeteroGraph(object):
return self.has_nodes(vid, ntype)
def has_edges_between(self, u, v, etype=None):
"""Whether the graph has an edge (u, v) of type ``etype``.
"""Whether the graph has some particular edge(s) of a given type.
Parameters
----------
u : int, iterable of int, Tensor
Source node ID(s).
v : int, iterable of int, Tensor
Destination node ID(s).
u : source node ID(s)
The source node(s) of the edges for query. The allowed formats are:
- ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query.
The data type and device of the tensor must be the same as the :py:attr:`idtype` and
device of the graph. Its i-th element represents the source node ID of the
i-th edge for query.
- ``iterable[int]`` : Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph.
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
a : Tensor
Binary tensor indicating the existence of edges. ``a[i]=1`` if the graph
contains edge ``(u[i], v[i])`` of type ``etype``, 0 otherwise.
bool or bool Tensor
- If :attr:`u` and :attr:`v` are ``int`` objects, the result will be a ``bool``
indicating whether there is an edge from ``u`` to ``v`` of the given edge type.
- If :attr:`u` and :attr:`v` are ``Tensor`` or ``iterable[int]`` objects, the
result will be a bool Tensor whose i-th element indicates whether there is an
edge from ``u[i]`` to ``v[i]`` of the given edge type.
Notes
-----
The value(s) of :attr:`u` and :attr:`v` need to be separately smaller than the
number of nodes of the source and destination type.
Examples
--------
>>> g.has_edge_between(0, 1, ('user', 'plays', 'game'))
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for the edges.
>>> g.has_edges_between(1, 2)
True
>>> g.has_edge_between(0, 2, ('user', 'plays', 'game'))
False
>>> g.has_edge_between([0, 0], [1, 2], ('user', 'plays', 'game'))
tensor([1, 0])
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]))
tensor([ True, False])
If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), 'plays')
tensor([ True, False])
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
... ('user', 'follows', 'user'))
tensor([ True, False])
>>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]),
... ('user', 'follows', 'game'))
tensor([True, True])
"""
srctype, _, dsttype = self.to_canonical_etype(etype)
u_tensor = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
raise DGLError('u contains invalid node IDs')
v_tensor = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v_tensor, ntype=dsttype), dim=0)) != len(v_tensor):
raise DGLError('v contains invalid node IDs')
ret = self._graph.has_edges_between(
self.get_etype_id(etype),
utils.prepare_tensor(self, u, 'u'),
utils.prepare_tensor(self, v, 'v'))
u_tensor, v_tensor)
if isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral):
return bool(F.as_scalar(ret))
else:
......@@ -1943,79 +2602,111 @@ class DGLHeteroGraph(object):
return self.has_edges_between(u, v, etype)
def predecessors(self, v, etype=None):
"""Return the predecessors of node `v` in the graph with the specified
edge type.
"""Return the predecessor(s) of a particular node with the specified edge type.
Node `u` is a predecessor of `v` if an edge `(u, v)` with type `etype`
exists in the graph.
Node ``u`` is a predecessor of node ``v`` if there is an edge ``(u, v)`` with type
``etype`` in the graph.
Parameters
----------
v : int
The destination node.
The destination node for query.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor
Array of predecessor node IDs with the specified edge type.
Tensor
The predecessors of :attr:`v` with the specified edge type.
Examples
--------
The following example uses PyTorch backend.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> devs_g = dgl.bipartite(([0, 1], [0, 1]), 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([plays_g, devs_g])
>>> g.predecessors(0, 'plays')
tensor([0, 1])
>>> g.predecessors(0, 'develops')
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
Query for node 1.
>>> g.predecessors(1)
tensor([0, 0])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.predecessors(1, etype='follows')
tensor([0])
See Also
--------
successors
"""
if not self.has_nodes(v, self.to_canonical_etype(etype)[-1]):
raise DGLError('Non-existing node ID {}'.format(v))
return self._graph.predecessors(self.get_etype_id(etype), v)
def successors(self, v, etype=None):
"""Return the successors of node `v` in the graph with the specified edge
type.
"""Return the successor(s) of a particular node with the specified edge type.
Node `u` is a successor of `v` if an edge `(v, u)` with type `etype` exists
in the graph.
Node ``u`` is a successor of node ``v`` if there is an edge ``(v, u)`` with type
``etype`` in the graph.
Parameters
----------
v : int
The source node.
The source node for query.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor
Array of successor node IDs with the specified edge type.
Tensor
The successors of :attr:`v` with the specified edge type.
Examples
--------
The following example uses PyTorch backend.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g.successors(0, 'plays')
tensor([0])
>>> g.successors(0, 'follows')
tensor([1])
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
Query for node 1.
>>> g.successors(1)
tensor([2, 3])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.successors(1, etype='follows')
tensor([2])
See Also
--------
predecessors
"""
if not self.has_nodes(v, self.to_canonical_etype(etype)[0]):
raise DGLError('Non-existing node ID {}'.format(v))
return self._graph.successors(self.get_etype_id(etype), v)
def edge_id(self, u, v, force_multi=None, return_uv=False, etype=None):
......@@ -2029,67 +2720,107 @@ class DGLHeteroGraph(object):
return_uv=return_uv, etype=etype)
def edge_ids(self, u, v, force_multi=None, return_uv=False, etype=None):
"""Return all edge IDs between source node array `u` and destination
node array `v` with the specified edge type.
"""Return the ID(s) of edge(s) from the given source node(s) to the given destination
node(s) with the specified edge type.
Parameters
----------
u : int, list, tensor
The node ID array of source type.
v : int, list, tensor
The node ID array of destination type.
u : source node ID(s)
The source node(s) of the edges for query. The allowed formats are:
- ``int``: The source node of an edge for query.
- ``Tensor``: A 1D tensor that contains the source node(s) of edge(s) for query, whose
data type an device should be the same as the :py:attr:`idtype` and device of
the graph. Its i-th element is the source node of the i-th edge for query.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
v : destination node ID(s)
The destination node(s) of the edges for query. It's a counterpart of :attr:`u`
for destination nodes and should have the same format as :attr:`u`. If :attr:`u`
and :attr:`v` are not int, they should have the same length.
force_multi : bool, optional
Deprecated (Will be deleted in the future).
Whether to always treat the graph as a multigraph. See the
"Returns" for their effects. (Default: False)
return_uv : bool
See the "Returns" for their effects. (Default: False)
Deprecated, use :attr:`return_uv` instead. Whether to allow the graph to be a
multigraph, i.e. there can be multiple edges from one node to another.
return_uv : bool, optional
Whether to return the source and destination node IDs along with the edges. If
False (default), it assumes that the graph is a simple graph and there is only
one edge from one node to another. If True, there can be multiple edges found
from one node to another.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph.
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor, or (tensor, tensor, tensor)
* If ``return_uv=False``, return a single edge ID array ``e``.
``e[i]`` is the edge ID between ``u[i]`` and ``v[i]``.
* Otherwise, return three arrays ``(eu, ev, e)``. ``e[i]`` is the ID
of an edge between ``eu[i]`` and ``ev[i]``. All edges between ``u[i]``
and ``v[i]`` are returned.
* If ``return_uv=False``, it returns a 1D tensor that contains the IDs of the edges.
If :attr:`u` and :attr:`v` are int, the tensor has length 1. Otherwise, the i-th
element of the tensor is the ID of the edge ``(u[i], v[i])``.
* If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``.
``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges
from ``eu[i]`` to ``ev[i]`` in this case.
Notes
-----
If the graph is a simple graph, ``return_uv=False``, and no edge
exists between some pairs of ``u[i]`` and ``v[i]``, the result is undefined
and an empty tensor is returned.
If the graph is a simple graph, ``return_uv=False``, and there are no edges
between some pairs of node(s), it will raise an error.
If the graph is a multi graph, ``return_uv=False``, and multi edges
exist between some pairs of `u[i]` and `v[i]`, the result is undefined.
If the graph is a multigraph, ``return_uv=False``, and there are multiple edges
between some pairs of node(s), it returns an arbitrary one from them.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Create a homogeneous graph.
Query for edge ids.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1, 1]), torch.tensor([1, 0, 2, 3, 2])))
>>> plays_g.edge_ids([0], [2], etype=('user', 'plays', 'game'))
tensor([], dtype=torch.int64)
>>> plays_g.edge_ids([1], [2], etype=('user', 'plays', 'game'))
tensor([2])
>>> g.edge_ids([1], [2], return_uv=True, etype=('user', 'follows', 'user'))
(tensor([1, 1]), tensor([2, 2]), tensor([1, 2]))
Query for the edges.
>>> g.edge_ids(0, 0)
1
>>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]))
tensor([3, 0])
Get all edges for pairs of nodes.
>>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]), return_uv=True)
(tensor([1, 0]), tensor([3, 1]), tensor([3, 0]))
If the graph has multiple edge types, one need to specify the edge type.
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])),
... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3]))
... })
>>> g.edge_ids(torch.tensor([1]), torch.tensor([2]), etype='plays')
tensor([0])
Use a canonical edge type instead when there is ambiguity for an edge type.
>>> g.edge_ids(torch.tensor([0, 1]), torch.tensor([1, 2]),
... etype=('user', 'follows', 'user'))
tensor([0, 1])
>>> g.edge_ids(torch.tensor([1, 2]), torch.tensor([2, 3]),
... etype=('user', 'follows', 'game'))
tensor([1, 2])
"""
is_int = isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral)
srctype, _, dsttype = self.to_canonical_etype(etype)
u = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('u contains invalid node IDs')
v = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
raise DGLError('v contains invalid node IDs')
if force_multi is not None:
dgl_warning("force_multi will be deprecated, " \
"Please use return_uv instead")
......@@ -2109,88 +2840,145 @@ class DGLHeteroGraph(object):
return F.as_scalar(eid) if is_int else eid
def find_edges(self, eid, etype=None):
"""Given an edge ID array with the specified type, return the source
and destination node ID array ``s`` and ``d``. ``s[i]`` and ``d[i]``
are source and destination node ID for edge ``eid[i]``.
"""Return the source and destination node(s) of some particular edge(s)
with the specified edge type.
Parameters
----------
eid : list, tensor
The edge ID array.
eid : edge ID(s)
The IDs of the edges for query. The function expects that :attr:`eid` contains
valid edge IDs only, i.e. among consecutive integers :math:`0, 1, ... E - 1`, where
:math:`E` is the number of edges with the specified edge type.
- ``int``: An edge ID for query.
- ``Tensor``: A 1D tensor that contains the edge IDs for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type.
Returns
-------
tensor
The source node ID array.
tensor
The destination node ID array.
Tensor
The source node IDs of the edges, whose i-th element is the source node of the edge
with ID ``eid[i]``.
Tensor
The destination node IDs of the edges, whose i-th element is the destination node of
the edge with ID ``eid[i]``.
Examples
--------
The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.find_edges([0, 2], ('user', 'plays', 'game'))
(tensor([0, 1]), tensor([0, 2]))
>>> g.find_edges([0, 2])
(tensor([0, 1]), tensor([0, 2]))
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Find edges of IDs 0 and 2.
>>> g.find_edges(torch.tensor([0, 2]))
(tensor([0, 1]), tensor([1, 2]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.find_edges(torch.tensor([1, 0]), 'plays')
(tensor([4, 3]), tensor([6, 5]))
"""
eid = utils.prepare_tensor(self, eid, 'eid')
if len(eid) > 0:
min_eid = F.as_scalar(F.min(eid, 0))
if min_eid < 0:
raise DGLError('Invalid edge ID {:d}'.format(min_eid))
max_eid = F.as_scalar(F.max(eid, 0))
if max_eid >= self.num_edges(etype):
raise DGLError('Invalid edge ID {:d}'.format(max_eid))
if len(eid) == 0:
empty = F.copy_to(F.tensor([], self.idtype), self.device)
return empty, empty
# sanity check
max_eid = F.as_scalar(F.max(eid, dim=0))
if max_eid >= self.number_of_edges(etype):
raise DGLError('Expect edge IDs to be smaller than number of edges ({}). '
' But got {}.'.format(self.number_of_edges(etype), max_eid))
src, dst, _ = self._graph.find_edges(self.get_etype_id(etype), eid)
return src, dst
def in_edges(self, v, form='uv', etype=None):
"""Return the inbound edges of the node(s) with the specified type.
"""Return the incoming edges of some particular node(s) with the specified edge type.
Parameters
----------
v : int, list, tensor
The node id(s) of destination type.
v : destination node ID(s)
The destination node(s) for query. The allowed formats are:
- ``int``: The destination node for query.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
form : str, optional
The return form. Currently support:
- ``'eid'`` : one eid tensor
- ``'all'`` : a tuple ``(u, v, eid)``
- ``'uv'`` : a pair ``(u, v)``, default
The return form, which can be one of the following:
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
- ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
representing the source and destination nodes of all edges. For each :math:`i`,
:math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor or (tensor, tensor, tensor) or (tensor, tensor)
All inbound edges to ``v`` are returned.
* If ``form='eid'``, return a tensor for the ids of the
inbound edges of the nodes with the specified type.
* If ``form='all'``, return a 3-tuple of tensors
``(eu, ev, eid)``. ``eid[i]`` gives the ID of the
edge from ``eu[i]`` to ``ev[i]``.
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
``eu[i]`` is the source node of an edge to ``ev[i]``.
Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All incoming edges of the nodes with the specified type. For a description of the
returned result, see the description of :attr:`form`.
Examples
--------
The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game')
>>> g.in_edges([0, 2], form='eid')
tensor([0, 2])
>>> g.in_edges([0, 2], form='all')
(tensor([0, 1]), tensor([0, 2]), tensor([0, 2]))
>>> g.in_edges([0, 2], form='uv')
(tensor([0, 1]), tensor([0, 2]))
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for the nodes 1 and 0.
>>> g.in_edges(torch.tensor([1, 0]))
(tensor([0, 0]), tensor([1, 0]))
Specify a different value for :attr:`form`.
>>> g.in_edges(torch.tensor([1, 0]), form='all')
(tensor([0, 0]), tensor([1, 0]), tensor([0, 1]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.in_edges(torch.tensor([1, 0]), etype='follows')
(tensor([0]), tensor([1]))
See Also
--------
edges
out_edges
"""
v = utils.prepare_tensor(self, v, 'v')
src, dst, eid = self._graph.in_edges(self.get_etype_id(etype), v)
......@@ -2204,46 +2992,80 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def out_edges(self, u, form='uv', etype=None):
"""Return the outbound edges of the node(s) with the specified type.
"""Return the outgoing edges of some particular node(s) with the specified edge type.
Parameters
----------
u : int, list, tensor
The node id(s) of source type.
u : source node ID(s)
The source node(s) for query. The allowed formats are:
- ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
form : str, optional
The return form. Currently support:
- ``'eid'`` : one eid tensor
- ``'all'`` : a tuple ``(u, v, eid)``
- ``'uv'`` : a pair ``(u, v)``, default
The return form, which can be one of the following:
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
- ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
representing the source and destination nodes of all edges. For each :math:`i`,
:math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All outgoing edges of the nodes with the specified type. For a description of the
returned result, see the description of :attr:`form`.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for the nodes 1 and 2.
>>> g.out_edges(torch.tensor([1, 2]))
(tensor([1, 1]), tensor([2, 3]))
Specify a different value for :attr:`form`.
>>> g.out_edges(torch.tensor([1, 2]), form='all')
(tensor([1, 1]), tensor([2, 3]), tensor([2, 3]))
Returns
-------
tensor or (tensor, tensor, tensor) or (tensor, tensor)
All outbound edges from ``u`` are returned.
For a graph of multiple edge types, it is required to specify the edge type in query.
* If ``form='eid'``, return a tensor for the ids of the outbound edges
of the nodes with the specified type.
* If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``.
``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``.
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
``ev[i]`` is the destination node of the edge from ``eu[i]``.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.out_edges(torch.tensor([1, 2]), etype='follows')
(tensor([1]), tensor([2]))
Examples
See Also
--------
>>> g = dgl.bipartite(([0, 1, 1], [0, 1, 2]), 'user', 'plays', 'game')
>>> g.out_edges([0, 1], form='eid')
tensor([0, 1, 2])
>>> g.out_edges([0, 1], form='all')
(tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([0, 1, 2]))
>>> g.out_edges([0, 1], form='uv')
(tensor([0, 1, 1]), tensor([0, 1, 2]))
edges
in_edges
"""
u = utils.prepare_tensor(self, u, 'u')
srctype, _, _ = self.to_canonical_etype(etype)
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('u contains invalid node IDs')
src, dst, eid = self._graph.out_edges(self.get_etype_id(etype), u)
if form == 'all':
return src, dst, eid
......@@ -2254,49 +3076,75 @@ class DGLHeteroGraph(object):
else:
raise DGLError('Invalid form: {}. Must be "all", "uv" or "eid".'.format(form))
def all_edges(self, form='uv', order=None, etype=None):
"""Return all edges with the specified type.
def all_edges(self, form='uv', order='eid', etype=None):
"""Return all edges with the specified edge type.
Parameters
----------
form : str, optional
The return form. Currently support:
- ``'eid'`` : one eid tensor
- ``'all'`` : a tuple ``(u, v, eid)``
- ``'uv'`` : a pair ``(u, v)``, default
order : str or None
The order of the returned edges. Currently support:
- ``'srcdst'`` : sorted by their src and dst ids.
- ``'eid'`` : sorted by edge Ids.
- ``None`` : arbitrary order, default
The return form, which can be one of the following:
- ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing
the IDs of all edges.
- ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`,
representing the source and destination nodes of all edges. For each :math:`i`,
:math:`(U[i], V[i])` forms an edge.
- ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`,
representing the source nodes, destination nodes and IDs of all edges.
For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`.
order : str, optional
The order of the returned edges, which can be one of the following:
- ``'srcdst'``: The edges are sorted first by their source node IDs and then
by their destination node IDs to break ties.
- ``'eid'`` (default): The edges are sorted by their IDs.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor or (tensor, tensor, tensor) or (tensor, tensor)
* If ``form='eid'``, return a tensor for the ids of all edges
with the specified type.
* If ``form='all'``, return a 3-tuple of tensors ``(eu, ev, eid)``.
``eid[i]`` gives the ID of the edge from ``eu[i]`` to ``ev[i]``.
* If ``form='uv'``, return a 2-tuple of tensors ``(eu, ev)``.
``ev[i]`` is the destination node of the edge from ``eu[i]``.
Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor)
All edges of the specified edge type. For a description of the returned result,
see the description of :attr:`form`.
Examples
--------
The following example uses PyTorch backend.
>>> g = dgl.bipartite(([1, 0, 1], [1, 0, 2]), 'user', 'plays', 'game')
>>> g.all_edges(form='eid', order='srcdst')
tensor([1, 0, 2])
>>> import dgl
>>> import torch
Create a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3])))
Query for edges.
>>> g.all_edges()
(tensor([0, 0, 1, 1]), tensor([1, 0, 2, 3]))
Specify a different value for :attr:`form` and :attr:`order`.
>>> g.all_edges(form='all', order='srcdst')
(tensor([0, 1, 1]), tensor([0, 1, 2]), tensor([1, 0, 2]))
>>> g.all_edges(form='uv', order='eid')
(tensor([1, 0, 1]), tensor([1, 0, 2]))
(tensor([0, 0, 1, 1]), tensor([0, 1, 2, 3]), tensor([1, 0, 2, 3]))
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.all_edges(etype='plays')
(tensor([3, 4]), tensor([5, 6]))
See Also
--------
edges
in_edges
out_edges
"""
src, dst, eid = self._graph.edges(self.get_etype_id(etype), order)
if form == 'all':
......@@ -2317,46 +3165,78 @@ class DGLHeteroGraph(object):
return self.in_degrees(v, etype)
def in_degrees(self, v=ALL, etype=None):
"""Return the in-degrees of nodes v with edges of type ``etype``.
"""Return the in-degree(s) of some particular node(s) with the specified edge type.
Parameters
----------
v : int, iterable of int or tensor, optional.
The node ID array of the destination type. Default is to return the
degrees of all nodes.
etype : str or tuple of str or None, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
v : destination node ID(s), optional
The destination node(s) for query. The allowed formats are:
- ``int``: The destination node for query.
- ``Tensor``: A 1D tensor that contains the destination node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
d : tensor or int
The in-degree array. ``d[i]`` gives the in-degree of node ``v[i]``
with edges of type ``etype``. If the argument is an integer, so will
be the return.
tensor or int
The in-degree(s) of the node(s).
- If :attr:`v` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`v` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the in-degree of the
node ``v[i]``.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Create a homogeneous graph.
Query for node degree.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
>>> g.in_degrees(0, 'plays')
2
>>> g.in_degrees(etype='follows')
tensor([0, 1, 2])
Query for all nodes.
>>> g.in_degrees()
tensor([0, 2, 1, 1])
Query for nodes 1 and 2.
>>> g.in_degrees(torch.tensor([1, 2]))
tensor([2, 1])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.in_degrees(torch.tensor([1, 0]), etype='follows')
tensor([1, 0])
See Also
--------
out_degrees
"""
dsttype = self.to_canonical_etype(etype)[2]
etid = self.get_etype_id(etype)
if is_all(v):
v = self.dstnodes(dsttype)
deg = self._graph.in_degrees(etid, utils.prepare_tensor(self, v, 'v'))
v_tensor = utils.prepare_tensor(self, v, 'v')
deg = self._graph.in_degrees(etid, v_tensor)
if isinstance(v, numbers.Integral):
return F.as_scalar(deg)
else:
......@@ -2371,48 +3251,78 @@ class DGLHeteroGraph(object):
return self.out_degrees(u, etype)
def out_degrees(self, u=ALL, etype=None):
"""Return the out-degrees of nodes u with edges of type ``etype``.
"""Return the out-degree(s) of some particular node(s) with the specified edge type.
Parameters
----------
u : list, tensor
The node ID array of source type. Default is to return the degrees
of all the nodes.
u : source node ID(s), optional
- ``int``: The source node for query.
- ``Tensor``: A 1D tensor that contains the source node(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- ``iterable[int]``: Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
d : tensor
The out-degree array. ``d[i]`` gives the out-degree of node ``u[i]``
with edges of type ``etype``.
tensor or int
The out-degree(s) of the node(s).
- If :attr:`u` is an ``int`` object, the return result will be an ``int``
object as well.
- If :attr:`u` is a ``Tensor`` or ``iterable[int]`` object, the return result
will be a 1D ``Tensor``. The data type of the result will be the same as the
idtype of the graph. The i-th element of the tensor is the out-degree of the
node ``v[i]``.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> import dgl
>>> import torch
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
Create a homogeneous graph.
Query for node degree.
>>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3])))
>>> g.out_degrees(0, 'plays')
1
>>> g.out_degrees(etype='follows')
tensor([1, 2, 0])
Query for all nodes.
>>> g.out_degrees()
tensor([2, 2, 0, 0])
Query for nodes 1 and 2.
>>> g.out_degrees(torch.tensor([1, 2]))
tensor([2, 0])
For a graph of multiple edge types, it is required to specify the edge type in query.
>>> hg = dgl.heterograph({
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])),
... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6]))
... })
>>> hg.out_degrees(torch.tensor([1, 0]), etype='follows')
tensor([1, 1])
See Also
--------
out_degree
in_degrees
"""
srctype = self.to_canonical_etype(etype)[0]
etid = self.get_etype_id(etype)
if is_all(u):
u = self.srcnodes(srctype)
u_tensor = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0)) != len(u_tensor):
raise DGLError('u contains invalid node IDs')
deg = self._graph.out_degrees(etid, utils.prepare_tensor(self, u, 'u'))
if isinstance(u, numbers.Integral):
return F.as_scalar(deg)
......@@ -2420,6 +3330,10 @@ class DGLHeteroGraph(object):
return deg
def adjacency_matrix(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Alias of :func:`adj`"""
return self.adj(transpose, ctx, scipy_fmt, etype)
def adj(self, transpose=None, ctx=F.cpu(), scipy_fmt=None, etype=None):
"""Return the adjacency matrix of edges of the given edge type.
By default, a row of returned adjacency matrix represents the
......@@ -2437,9 +3351,12 @@ class DGLHeteroGraph(object):
scipy_fmt : str, optional
If specified, return a scipy sparse matrix in the given format.
Otherwise, return a backend dependent sparse tensor. (Default: None)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns
-------
......@@ -2449,15 +3366,21 @@ class DGLHeteroGraph(object):
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Instantiate a heterogeneous graph.
>>> follows_g = dgl.graph(([0, 1], [0, 1]), 'user', 'follows')
>>> devs_g = dgl.bipartite(([0, 1], [0, 2]), 'developer', 'develops', 'game')
>>> g = dgl.hetero_from_relations([follows_g, devs_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [0, 1]),
... ('developer', 'develops', 'game'): ([0, 1], [0, 2])
... })
Get a backend dependent sparse tensor. Here we use PyTorch for example.
>>> g.adjacency_matrix(etype='develops')
>>> g.adj(etype='develops')
tensor(indices=tensor([[0, 2],
[0, 1]]),
values=tensor([1., 1.]),
......@@ -2465,7 +3388,7 @@ class DGLHeteroGraph(object):
Get a scipy coo sparse matrix.
>>> g.adjacency_matrix(scipy_fmt='coo', etype='develops')
>>> g.adj(scipy_fmt='coo', etype='develops')
<3x2 sparse matrix of type '<class 'numpy.int64'>'
with 2 stored elements in COOrdinate format>
"""
......@@ -2482,9 +3405,6 @@ class DGLHeteroGraph(object):
else:
return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)
# Alias of ``adjacency_matrix``
adj = adjacency_matrix
def adjacency_matrix_scipy(self, transpose=None, fmt='csr', return_edge_ids=None):
"""DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
"""
......@@ -2495,6 +3415,10 @@ class DGLHeteroGraph(object):
return self.adjacency_matrix(transpose=transpose, scipy_fmt=fmt)
def incidence_matrix(self, typestr, ctx=F.cpu(), etype=None):
"""Alias of :func:`inc`"""
return self.inc(typestr, ctx, etype)
def inc(self, typestr, ctx=F.cpu(), etype=None):
"""Return the incidence matrix representation of edges with the given
edge type.
......@@ -2529,9 +3453,12 @@ class DGLHeteroGraph(object):
Can be either ``in``, ``out`` or ``both``
ctx : context, optional
The context of returned incidence matrix. (Default: cpu)
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If given, it returns the number of edges for a
particular edge type. If not given (default), it returns the total number of edges
of all types.
Returns
-------
......@@ -2541,18 +3468,22 @@ class DGLHeteroGraph(object):
Examples
--------
>>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows')
>>> g.incidence_matrix('in')
The following example uses PyTorch backend.
>>> import dgl
>>> g = dgl.graph(([0, 1], [0, 2]))
>>> g.inc('in')
tensor(indices=tensor([[0, 2],
[0, 1]]),
values=tensor([1., 1.]),
size=(3, 2), nnz=2, layout=torch.sparse_coo)
>>> g.incidence_matrix('out')
>>> g.inc('out')
tensor(indices=tensor([[0, 1],
[0, 1]]),
values=tensor([1., 1.]),
size=(3, 2), nnz=2, layout=torch.sparse_coo)
>>> g.incidence_matrix('both')
>>> g.inc('both')
tensor(indices=tensor([[1, 2],
[1, 1]]),
values=tensor([-1., 1.]),
......@@ -2561,9 +3492,6 @@ class DGLHeteroGraph(object):
etid = self.get_etype_id(etype)
return self._graph.incidence_matrix(etid, typestr, ctx)[0]
# Alias of ``incidence_matrix``
inc = incidence_matrix
#################################################################
# Features
#################################################################
......@@ -2571,29 +3499,44 @@ class DGLHeteroGraph(object):
def node_attr_schemes(self, ntype=None):
"""Return the node feature schemes for the specified type.
Each feature scheme is a named tuple that stores the shape and data type
of the node feature.
The scheme of a feature describes the shape and data type of it.
Parameters
----------
ntype : str, optional
The node type. Can be omitted if there is only one node
type in the graph. Error will be raised otherwise.
(Default: None)
The node type for query. If the graph has multiple node types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
dict of str to schemes
The schemes of node feature columns.
dict[str, Scheme]
A dictionary mapping a feature name to its associated feature scheme.
Examples
--------
The following uses PyTorch backend.
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a homogeneous graph.
>>> g = dgl.graph(([0, 1], [0, 2]), 'user', 'follows')
>>> g.nodes['user'].data['h'] = torch.randn(3, 4)
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.ndata['h1'] = torch.randn(3, 1)
>>> g.ndata['h2'] = torch.randn(3, 2)
>>> g.node_attr_schemes()
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
Query for a heterogeneous graph of multiple node types.
>>> g = dgl.heterograph({('user', 'plays', 'game'):
... (torch.tensor([1, 2]), torch.tensor([3, 4]))})
>>> g.nodes['user'].data['h1'] = torch.randn(3, 1)
>>> g.nodes['user'].data['h2'] = torch.randn(3, 2)
>>> g.node_attr_schemes('user')
{'h': Scheme(shape=(4,), dtype=torch.float32)}
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
See Also
--------
......@@ -2604,28 +3547,48 @@ class DGLHeteroGraph(object):
def edge_attr_schemes(self, etype=None):
"""Return the edge feature schemes for the specified type.
Each feature scheme is a named tuple that stores the shape and data type
of the edge feature.
The scheme of a feature describes the shape and data type of it.
Parameters
----------
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
dict of str to schemes
The schemes of edge feature columns.
dict[str, Scheme]
A dictionary mapping a feature name to its associated feature scheme.
Examples
--------
The following uses PyTorch backend.
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Query for a homogeneous graph.
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> g.edges['user', 'plays', 'game'].data['h'] = torch.randn(4, 4)
>>> g.edge_attr_schemes(('user', 'plays', 'game'))
{'h': Scheme(shape=(4,), dtype=torch.float32)}
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> g.edata['h1'] = torch.randn(2, 1)
>>> g.edata['h2'] = torch.randn(2, 2)
>>> g.edge_attr_schemes()
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
Query for a heterogeneous graph of multiple edge types.
>>> g = dgl.heterograph({('user', 'plays', 'game'):
... (torch.tensor([1, 2]), torch.tensor([3, 4])),
... ('user', 'follows', 'user'):
... (torch.tensor([3, 4]), torch.tensor([5, 6]))})
>>> g.edges['plays'].data['h1'] = torch.randn(2, 1)
>>> g.edges['plays'].data['h2'] = torch.randn(2, 2)
>>> g.edge_attr_schemes('plays')
{'h1': Scheme(shape=(1,), dtype=torch.float32),
'h2': Scheme(shape=(2,), dtype=torch.float32)}
See Also
--------
......@@ -2634,67 +3597,180 @@ class DGLHeteroGraph(object):
return self._edge_frames[self.get_etype_id(etype)].schemes
def set_n_initializer(self, initializer, field=None, ntype=None):
"""Set the initializer for empty node features.
Initializer is a callable that returns a tensor given the shape, data type
and device context.
"""Set the initializer for node features.
When a subset of the nodes are assigned a new feature, initializer is
used to create feature for the rest of the nodes.
When only part of the nodes have a feature (e.g. new nodes are added,
features are set for a subset of nodes), the initializer initializes
features for the rest nodes.
Parameters
----------
initializer : callable
The initializer, mapping (shape, data type, context) to tensor.
A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
The tensor will be the initialized features. The arguments are:
- ``shape``: The shape of the tensor to return, which is a tuple of int.
The first dimension is the number of nodes for feature initialization.
- ``dtype``: The data type of the tensor to return, which is a
framework-specific data type object.
- ``ctx``: The device of the tensor to return, which is a framework-specific
device object.
- ``id_range``: The start and end ID of the nodes for feature initialization,
which is a slice.
field : str, optional
The feature field name. Default is to set an initializer for all the
feature fields.
The name of the feature that the initializer applies. If not given, the
initializer applies to all features.
ntype : str, optional
The node type. Can be omitted if there is only one node
type in the graph. Error will be raised otherwise.
(Default: None)
The type of the nodes that the initializer applies. If the graph has
multiple node types, one must specify the argument. Otherwise, it can
be omitted.
Note
Notes
-----
User defined initializer must follow the signature of
:func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
Without setting a node feature initializer, zero tensors are generated
for nodes without a feature.
See Also
Examples
--------
set_e_initializer
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Define a function for initializer.
>>> def init_feats(shape, dtype, device, id_range):
... return torch.ones(shape, dtype=dtype, device=device)
An example for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
>>> g.ndata['h1'] = torch.zeros(2, 2)
>>> g.ndata['h2'] = torch.ones(2, 1)
>>> # Apply the initializer to feature 'h2' only.
>>> g.set_n_initializer(init_feats, field='h2')
>>> g.add_nodes(1)
>>> print(g.ndata['h1'])
tensor([[0., 0.],
[0., 0.],
[0., 0.]])
>>> print(g.ndata['h2'])
tensor([[1.], [1.], [1.]])
An example for a heterogeneous graph of multiple node types.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.nodes['user'].data['h'] = torch.zeros(3, 2)
>>> g.nodes['game'].data['w'] = torch.ones(2, 2)
>>> g.set_n_initializer(init_feats, ntype='game')
>>> g.add_nodes(1, ntype='user')
>>> # Initializer not set for 'user', use zero tensors by default
>>> g.nodes['user'].data['h']
tensor([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]])
>>> # Initializer set for 'game'
>>> g.add_nodes(1, ntype='game')
>>> g.nodes['game'].data['w']
tensor([[1., 1.],
[1., 1.],
[1., 1.]])
"""
ntid = self.get_ntype_id(ntype)
self._node_frames[ntid].set_initializer(initializer, field)
def set_e_initializer(self, initializer, field=None, etype=None):
"""Set the initializer for empty edge features.
"""Set the initializer for edge features.
Initializer is a callable that returns a tensor given the shape, data
type and device context.
When a subset of the edges are assigned a new feature, initializer is
used to create feature for rest of the edges.
When only part of the edges have a feature (e.g. new edges are added,
features are set for a subset of edges), the initializer initializes
features for the rest edges.
Parameters
----------
initializer : callable
The initializer, mapping (shape, data type, context) to tensor.
A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``.
The tensor will be the initialized features. The arguments are:
- ``shape``: The shape of the tensor to return, which is a tuple of int.
The first dimension is the number of edges for feature initialization.
- ``dtype``: The data type of the tensor to return, which is a
framework-specific data type object.
- ``ctx``: The device of the tensor to return, which is a framework-specific
device object.
- ``id_range``: The start and end ID of the edges for feature initialization,
which is a slice.
field : str, optional
The feature field name. Default is set an initializer for all the
feature fields.
The name of the feature that the initializer applies. If not given, the
initializer applies to all features.
etype : str or tuple of str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. Error will be raised otherwise.
(Default: None)
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Note
Notes
-----
User defined initializer must follow the signature of
:func:`dgl.init.base_initializer() <dgl.init.base_initializer>`
Without setting an edge feature initializer, zero tensors are generated
for edges without a feature.
See Also
Examples
--------
set_n_initializer
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Define a function for initializer.
>>> def init_feats(shape, dtype, device, id_range):
... return torch.ones(shape, dtype=dtype, device=device)
An example for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0]), torch.tensor([1])))
>>> g.edata['h1'] = torch.zeros(1, 2)
>>> g.edata['h2'] = torch.ones(1, 1)
>>> # Apply the initializer to feature 'h2' only.
>>> g.set_e_initializer(init_feats, field='h2')
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]))
>>> print(g.edata['h1'])
tensor([[0., 0.],
[0., 0.]])
>>> print(g.edata['h2'])
tensor([[1.], [1.]])
An example for a heterogeneous graph of multiple edge types.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 0])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.edges['plays'].data['h'] = torch.zeros(2, 2)
>>> g.edges['develops'].data['w'] = torch.ones(2, 2)
>>> g.set_e_initializer(init_feats, etype='plays')
>>> # Initializer not set for 'develops', use zero tensors by default
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='develops')
>>> g.edges['develops'].data['w']
tensor([[1., 1.],
[1., 1.],
[0., 0.]])
>>> # Initializer set for 'plays'
>>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='plays')
>>> g.edges['plays'].data['h']
tensor([[0., 0.],
[0., 0.],
[1., 1.]])
"""
etid = self.get_etype_id(etype)
self._edge_frames[etid].set_initializer(initializer, field)
......@@ -2897,7 +3973,7 @@ class DGLHeteroGraph(object):
Examples
--------
>>> g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.ones(3, 5)
>>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user')
>>> g.nodes['user'].data['h']
......@@ -2942,7 +4018,7 @@ class DGLHeteroGraph(object):
Examples
--------
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])})
>>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5)
>>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2})
>>> g.edges[('user', 'plays', 'game')].data['h']
......@@ -3021,12 +4097,13 @@ class DGLHeteroGraph(object):
>>> import dgl.function as fn
>>> import torch
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 1, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
>>> g.send_and_recv(g['follows'].edges(), fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows')
... fn.sum('m', 'h'), etype='follows')
>>> g.nodes['user'].data['h']
tensor([[0.],
[0.],
......@@ -3045,7 +4122,8 @@ class DGLHeteroGraph(object):
return
u, v = self.find_edges(eid, etype=etype)
# call message passing onsubgraph
ndata = core.message_passing(_create_compute_graph(self, u, v, eid),
g = self if etype is None else self[etype]
ndata = core.message_passing(_create_compute_graph(g, u, v, eid),
message_func, reduce_func, apply_node_func)
dstnodes = F.unique(v)
self._set_n_repr(dtid, dstnodes, ndata)
......@@ -3106,9 +4184,10 @@ class DGLHeteroGraph(object):
Instantiate a heterograph.
>>> follows_g = dgl.graph(([0, 1], [1, 2]), 'user', 'follows')
>>> plays_g = dgl.bipartite(([0, 2], [0, 1]), 'user', 'plays', 'game')
>>> g = dgl.hetero_from_relations([follows_g, plays_g])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 2]),
... ('user', 'plays', 'game'): ([0, 2], [0, 1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Pull.
......@@ -3131,7 +4210,7 @@ class DGLHeteroGraph(object):
g = self if etype is None else self[etype]
# call message passing on subgraph
src, dst, eid = g.in_edges(v, form='all')
ndata = core.message_passing(_create_compute_graph(self, src, dst, eid, v),
ndata = core.message_passing(_create_compute_graph(g, src, dst, eid, v),
message_func, reduce_func, apply_node_func)
self._set_n_repr(dtid, v, ndata)
......@@ -3182,7 +4261,7 @@ class DGLHeteroGraph(object):
Instantiate a heterograph.
>>> g = dgl.graph(([0, 0], [1, 2]), 'user', 'follows')
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Push.
......@@ -3240,7 +4319,7 @@ class DGLHeteroGraph(object):
Instantiate a heterograph.
>>> g = dgl.graph(([0, 1, 2], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])})
Update all.
......@@ -3301,18 +4380,19 @@ class DGLHeteroGraph(object):
Instantiate a heterograph.
>>> g1 = dgl.graph(([0, 1], [1, 1]), 'user', 'follows')
>>> g2 = dgl.bipartite(([0], [1]), 'game', 'attracts', 'user')
>>> g = dgl.hetero_from_relations([g1, g2])
>>> g = dgl.heterograph({
... ('user', 'follows', 'user'): ([0, 1], [1, 1]),
... ('game', 'attracts', 'user'): ([0], [1])
... })
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]])
>>> g.nodes['game'].data['h'] = torch.tensor([[1.]])
Update all.
>>> g.multi_update_all(
>>> {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')),
>>> 'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))},
>>> "sum")
... {'follows': (fn.copy_src('h', 'm'), fn.sum('m', 'h')),
... 'attracts': (fn.copy_src('h', 'm'), fn.sum('m', 'h'))},
... "sum")
>>> g.nodes['user'].data['h']
tensor([[0.],
[4.]])
......@@ -3327,7 +4407,8 @@ class DGLHeteroGraph(object):
raise DGLError('Invalid arguments for edge type "{}". Should be '
'(msg_func, reduce_func, [apply_node_func])'.format(etype))
mfunc, rfunc, afunc = args
all_out[dtid].append(core.message_passing(self[etype], mfunc, rfunc, afunc))
g = self if etype is None else self[etype]
all_out[dtid].append(core.message_passing(g, mfunc, rfunc, afunc))
merge_order[dtid].append(etid) # use edge type id as merge order hint
for dtid, frames in all_out.items():
# merge by cross_reducer
......@@ -3381,10 +4462,10 @@ class DGLHeteroGraph(object):
Instantiate a heterogrph and perform multiple rounds of message passing.
>>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows')
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
>>> g['follows'].prop_nodes([[2, 3], [4]], fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows')
... fn.sum('m', 'h'), etype='follows')
tensor([[1.],
[2.],
[1.],
......@@ -3439,10 +4520,10 @@ class DGLHeteroGraph(object):
Instantiate a heterogrph and perform multiple rounds of message passing.
>>> g = dgl.graph(([0, 1, 2, 3], [2, 3, 4, 4]), 'user', 'follows')
>>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])})
>>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]])
>>> g['follows'].prop_edges([[0, 1], [2, 3]], fn.copy_src('h', 'm'),
>>> fn.sum('m', 'h'), etype='follows')
... fn.sum('m', 'h'), etype='follows')
>>> g.nodes['user'].data['h']
tensor([[1.],
[2.],
......@@ -3463,38 +4544,78 @@ class DGLHeteroGraph(object):
#################################################################
def filter_nodes(self, predicate, nodes=ALL, ntype=None):
"""Return a tensor of node IDs with the given node type that satisfy
"""Return the IDs of the nodes with the given node type that satisfy
the given predicate.
Parameters
----------
predicate : callable
A function of signature ``func(nodes) -> tensor``.
``nodes`` are :class:`NodeBatch` objects as in :mod:`~dgl.udf`.
The ``tensor`` returned should be a 1-D boolean tensor with
A function of signature ``func(nodes) -> Tensor``.
``nodes`` are :class:`dgl.NodeBatch` objects.
Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding node in
the batch satisfies the predicate.
nodes : int, iterable or tensor of ints
The nodes to filter on. Default value is all the nodes.
nodes : node ID(s), optional
The node(s) for query. The allowed formats are:
- Tensor: A 1D tensor that contains the node(s) for query, whose data type
and device should be the same as the :py:attr:`idtype` and device of the graph.
- iterable[int] : Similar to the tensor, but stores node IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
By default, it considers all nodes.
ntype : str, optional
The node type. Can be omitted if there is only one node type
in the graph. (Default: None)
The node type for query. If the graph has multiple node types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor
Node ids indicating the nodes that satisfy the predicate.
A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate.
Examples
--------
>>> import torch
The following example uses PyTorch backend.
>>> import dgl
>>> import dgl.function as fn
>>> g = dgl.graph([], 'user', 'follows', num_nodes=4)
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> g.filter_nodes(lambda nodes: (nodes.data['h'] == 1.).squeeze(1), ntype='user')
>>> import torch
Define a predicate function.
>>> def nodes_with_feature_one(nodes):
... # Whether a node has feature 1
... return (nodes.data['h'] == 1.).squeeze(1)
Filter nodes for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g.ndata['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> print(g.filter_nodes(nodes_with_feature_one))
tensor([1, 2])
Filter on nodes with IDs 0 and 1
>>> print(g.filter_nodes(nodes_with_feature_one, nodes=torch.tensor([0, 1])))
tensor([1])
Filter nodes for a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1]))})
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.]])
>>> g.nodes['game'].data['h'] = torch.tensor([[0.], [1.]])
>>> # Filter for 'user' nodes
>>> print(g.filter_nodes(nodes_with_feature_one, ntype='user'))
tensor([1, 2])
"""
if is_all(nodes):
nodes = self.nodes(ntype)
v = utils.prepare_tensor(self, nodes, 'nodes')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=ntype), dim=0)) != len(v):
raise DGLError('v contains invalid node IDs')
with self.local_scope():
self.apply_nodes(lambda nbatch: {'_mask' : predicate(nbatch)}, nodes, ntype)
ntype = self.ntypes[0] if ntype is None else ntype
......@@ -3502,43 +4623,105 @@ class DGLHeteroGraph(object):
if is_all(nodes):
return F.nonzero_1d(mask)
else:
v = utils.prepare_tensor(self, nodes, 'nodes')
return F.boolean_mask(v, F.gather_row(mask, v))
def filter_edges(self, predicate, edges=ALL, etype=None):
"""Return a tensor of edge IDs with the given edge type that satisfy
"""Return the IDs of the edges with the given edge type that satisfy
the given predicate.
Parameters
----------
predicate : callable
A function of signature ``func(edges) -> tensor``.
``edges`` are :class:`EdgeBatch` objects as in :mod:`~dgl.udf`.
The ``tensor`` returned should be a 1-D boolean tensor with
A function of signature ``func(edges) -> Tensor``.
``edges`` are :class:`dgl.EdgeBatch` objects.
Its output tensor should be a 1D boolean tensor with
each element indicating whether the corresponding edge in
the batch satisfies the predicate.
edges : valid edges type
Edges on which to apply ``func``. See :func:`send` for valid
edges type. Default value is all the edges.
etype : str, optional
The edge type. Can be omitted if there is only one edge type
in the graph. (Default: None)
edges : edge ID(s) or edge end nodes, optional
The edge(s) for query. The allowed formats are:
- Tensor: A 1D tensor that contains the IDs of the edge(s) for query, whose data
type and device should be the same as the :py:attr:`idtype` and device of the graph.
- iterable[int]: Similar to the tensor, but stores edge IDs in a sequence
(e.g. list, tuple, numpy.ndarray).
- (Tensor, Tensor): A 2-tuple of the source and destination nodes of multiple
edges for query. Each tensor is a 1D tensor containing node IDs. DGL calls this
format "tuple of node-tensors". The data type and device of the tensors should
be the same as the :py:attr:`idtype` and device of the graph.
- (iterable[int], iterable[int]): Similar to the tuple of node-tensors format,
but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
By default, it considers all edges.
etype : str or tuple of str, optional
The edge type for query, which can be an edge type (str) or a canonical edge type
(3-tuple of str). When an edge type appears in multiple canonical edge types, one
must use a canonical edge type. If the graph has multiple edge types, one must
specify the argument. Otherwise, it can be omitted.
Returns
-------
tensor
Edge ids indicating the edges that satisfy the predicate.
A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate.
Examples
--------
>>> import torch
The following example uses PyTorch backend.
>>> import dgl
>>> import dgl.function as fn
>>> g = dgl.graph(([0, 0, 1, 2], [0, 1, 2, 3]), 'user', 'follows')
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> g.filter_edges(lambda edges: (edges.data['h'] == 1.).squeeze(1), etype='follows')
>>> import torch
Define a predicate function.
>>> def edges_with_feature_one(edges):
... # Whether an edge has feature 1
... return (edges.data['h'] == 1.).squeeze(1)
Filter edges for a homogeneous graph.
>>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])))
>>> g.edata['h'] = torch.tensor([[0.], [1.], [1.]])
>>> print(g.filter_edges(edges_with_feature_one))
tensor([1, 2])
Filter on edges with IDs 0 and 1
>>> print(g.filter_edges(edges_with_feature_one, edges=torch.tensor([0, 1])))
tensor([1])
Filter edges for a heterogeneous graph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2]))})
>>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]])
>>> # Filter for 'plays' nodes
>>> print(g.filter_edges(edges_with_feature_one, etype='plays'))
tensor([1, 2])
"""
if is_all(edges):
pass
elif isinstance(edges, tuple):
u, v = edges
srctype, _, dsttype = self.to_canonical_etype(etype)
u = utils.prepare_tensor(self, u, 'u')
if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len(u):
raise DGLError('edges[0] contains invalid node IDs')
v = utils.prepare_tensor(self, v, 'v')
if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len(v):
raise DGLError('edges[1] contains invalid node IDs')
elif isinstance(edges, Iterable) or F.is_tensor(edges):
edges = utils.prepare_tensor(self, edges, 'edges')
min_eid = F.as_scalar(F.min(edges, 0))
if len(edges) > 0 > min_eid:
raise DGLError('Invalid edge ID {:d}'.format(min_eid))
max_eid = F.as_scalar(F.max(edges, 0))
if len(edges) > 0 and max_eid >= self.num_edges(etype):
raise DGLError('Invalid edge ID {:d}'.format(max_eid))
else:
raise ValueError('Unsupported type of edges:', type(edges))
with self.local_scope():
self.apply_edges(lambda ebatch: {'_mask' : predicate(ebatch)}, edges, etype)
etype = self.canonical_etypes[0] if etype is None else etype
......@@ -3554,53 +4737,77 @@ class DGLHeteroGraph(object):
@property
def device(self):
"""Get the device context of this graph.
"""Get the device of the graph.
Returns
-------
device context
The device of the graph, which should be a framework-specific device object
(e.g., ``torch.device``).
Examples
--------
The following example uses PyTorch backend.
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> import dgl
>>> import torch
Create a homogeneous graph for demonstration.
>>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
>>> print(g.device)
device(type='cpu')
>>> g = g.to('cuda:0')
>>> print(g.device)
device(type='cuda', index=0)
Returns
-------
Device context object
The case of heterogeneous graphs is the same.
"""
return F.to_backend_ctx(self._graph.ctx)
def to(self, device, **kwargs): # pylint: disable=invalid-name
"""Move ndata, edata and graph structure to the targeted device (cpu/gpu).
If the graph is already on the specified device, the function directly returns it.
Otherwise, it returns a cloned graph on the specified device.
Parameters
----------
device : Framework-specific device context object
The context to move data to.
The context to move data to (e.g., ``torch.device``).
kwargs : Key-word arguments.
Key-word arguments fed to the framework copy function.
Returns
-------
g : DGLHeteroGraph
Moved DGLHeteroGraph of the targeted mode.
DGLGraph
The graph on the specified device.
Examples
--------
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
>>> g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
>>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [2.], [3.]])
>>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2])))
>>> g.ndata['h'] = torch.ones(3, 1)
>>> g.edata['h'] = torch.zeros(2, 2)
>>> g1 = g.to(torch.device('cuda:0'))
>>> print(g1.device)
device(type='cuda', index=0)
>>> print(g1.ndata['h'].device)
device(type='cuda', index=0)
>>> print(g1.nodes().device)
device(type='cuda', index=0)
The original graph is still on CPU.
>>> print(g.device)
device(type='cpu')
>>> print(g.ndata['h'].device)
device(type='cpu')
>>> print(g.nodes().device)
device(type='cpu')
The case of heterogeneous graphs is the same.
"""
if device is None or self.device == device:
return self
......@@ -3680,62 +4887,72 @@ class DGLHeteroGraph(object):
return ret
def local_var(self):
"""Return a heterograph object that can be used in a local function scope.
"""Return a graph object for usage in a local function scope.
The returned graph object shares the feature data and graph structure of this graph.
However, any out-place mutation to the feature data will not reflect to this graph,
thus making it easier to use in a function scope.
thus making it easier to use in a function scope (e.g. forward computation of a model).
If set, the local graph object will use same initializers for node features and
edge features.
Returns
-------
DGLHeteroGraph
The graph object that can be used as a local variable.
DGLGraph
The graph object for a local variable.
Notes
-----
Internally, the returned graph shares the same feature tensors, but construct a new
dictionary structure (aka. Frame) so adding/removing feature tensors from the returned
graph will not reflect to the original graph. However, inplace operations do change
the shared tensor values, so will be reflected to the original graph. This function
also has little overhead when the number of feature tensors in this graph is small.
Inplace operations do reflect to the original graph. This function also has little
overhead when the number of feature tensors in this graph is small.
Examples
--------
The following example uses PyTorch backend.
Avoid accidentally overriding existing feature data. This is quite common when
implementing a NN module:
>>> import dgl
>>> import torch
Create a function for computation on graphs.
>>> def foo(g):
>>> g = g.local_var()
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3))
>>> return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3))
>>> newh = foo(g) # get tensor of all ones
... g = g.local_var()
... g.edata['h'] = torch.ones((g.num_edges(), 3))
... g.edata['h2'] = torch.ones((g.num_edges(), 3))
... return g.edata['h']
``local_var`` avoids changing the graph features when exiting the function.
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
>>> newh = foo(g)
>>> print(g.edata['h']) # still get tensor of all zeros
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
>>> 'h2' in g.edata # new feature set in the function scope is not found
False
Automatically garbage collect locally-defined tensors without the need to manually
``pop`` the tensors.
In-place operations will still reflect to the original graph.
>>> def foo(g):
>>> g = g.local_var()
>>> # This 'h' feature will stay local and be GCed when the function exits
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3))
>>> return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> h = foo(g)
>>> print('h' in g.edata)
False
... g = g.local_var()
... # in-place operation
... g.edata['h'] += 1
... return g.edata['h']
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
>>> newh = foo(g)
>>> print(g.edata['h']) # the result changes
tensor([[1.],
[1.],
[1.]])
See Also
--------
local_var
local_scope
"""
ret = copy.copy(self)
ret._node_frames = [fr.clone() for fr in self._node_frames]
......@@ -3744,44 +4961,63 @@ class DGLHeteroGraph(object):
@contextmanager
def local_scope(self):
"""Enter a local scope context for this graph.
"""Enter a local scope context for the graph.
By entering a local scope, any out-place mutation to the feature data will
not reflect to the original graph, thus making it easier to use in a function scope.
not reflect to the original graph, thus making it easier to use in a function scope
(e.g. forward computation of a model).
If set, the local scope will use same initializers for node features and
edge features.
Notes
-----
Inplace operations do reflect to the original graph. This function also has little
overhead when the number of feature tensors in this graph is small.
Examples
--------
The following example uses PyTorch backend.
Avoid accidentally overriding existing feature data. This is quite common when
implementing a NN module:
>>> import dgl
>>> import torch
Create a function for computation on graphs.
>>> def foo(g):
>>> with g.local_scope():
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3))
>>> return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> g.edata['h'] = torch.zeros((g.number_of_edges(), 3))
>>> newh = foo(g) # get tensor of all ones
... with g.local_scope():
... g.edata['h'] = torch.ones((g.num_edges(), 3))
... g.edata['h2'] = torch.ones((g.num_edges(), 3))
... return g.edata['h']
``local_scope`` avoids changing the graph features when exiting the function.
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 3))
>>> newh = foo(g)
>>> print(g.edata['h']) # still get tensor of all zeros
tensor([[0., 0., 0.],
[0., 0., 0.],
[0., 0., 0.]])
>>> 'h2' in g.edata # new feature set in the function scope is not found
False
Automatically garbage collect locally-defined tensors without the need to manually
``pop`` the tensors.
In-place operations will still reflect to the original graph.
>>> def foo(g):
>>> with g.local_scope():
>>> # This 'h' feature will stay local and be GCed when the function exits
>>> g.edata['h'] = torch.ones((g.number_of_edges(), 3))
>>> return g.edata['h']
>>>
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game')
>>> h = foo(g)
>>> print('h' in g.edata)
False
... with g.local_scope():
... # in-place operation
... g.edata['h'] += 1
... return g.edata['h']
>>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2])))
>>> g.edata['h'] = torch.zeros((g.num_edges(), 1))
>>> newh = foo(g)
>>> print(g.edata['h']) # the result changes
tensor([[1.],
[1.],
[1.]])
See Also
--------
......@@ -3795,10 +5031,6 @@ class DGLHeteroGraph(object):
self._node_frames = old_nframes
self._edge_frames = old_eframes
def is_homogeneous(self):
"""Return if the graph is homogeneous."""
return len(self.ntypes) == 1 and len(self.etypes) == 1
def formats(self, formats=None):
r"""Get a cloned graph with the specified sparse format(s) or query
for the usage status of sparse formats
......@@ -3835,7 +5067,7 @@ class DGLHeteroGraph(object):
**Homographs or Heterographs with A Single Edge Type**
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
>>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
>>> g.ndata['h'] = torch.ones(4, 1)
>>> # Check status of format usage
>>> g.formats()
......@@ -3855,11 +5087,11 @@ class DGLHeteroGraph(object):
**Heterographs with Multiple Edge Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.formats()
{'created': ['coo'], 'not created': ['csr', 'csc']}
>>> # Get a clone of the graph with 'csr' format
......@@ -3894,7 +5126,7 @@ class DGLHeteroGraph(object):
**Homographs or Heterographs with A Single Edge Type**
>>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])
>>> g = dgl.graph(([0, 0, 1], [2, 3, 2]))
>>> g.format()
{'created': ['coo'], 'not created': ['csr', 'csc']}
>>> g.create_format_()
......@@ -3904,11 +5136,11 @@ class DGLHeteroGraph(object):
**Heterographs with Multiple Edge Types**
>>> g = dgl.heterograph({
>>> ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
>>> torch.tensor([0, 0, 1, 1])),
>>> ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
>>> torch.tensor([0, 1]))
>>> })
... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
... torch.tensor([0, 0, 1, 1])),
... ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
... torch.tensor([0, 1]))
... })
>>> g.format()
{'created': ['coo'], 'not created': ['csr', 'csc']}
>>> g.create_format_()
......@@ -3934,8 +5166,7 @@ class DGLHeteroGraph(object):
"""
if idtype is None:
return self
if not idtype in (F.int32, F.int64):
raise DGLError("ID type must be int32 or int64, but got {}.".format(idtype))
utils.check_valid_idtype(idtype)
if self.idtype == idtype:
return self
bits = 32 if idtype == F.int32 else 64
......@@ -3974,51 +5205,102 @@ class DGLHeteroGraph(object):
def long(self):
"""Cast this graph to use int64 IDs.
"""Cast the graph to one with idtype int64
Features are copied (shallow copy) to the new graph.
If the graph already has idtype int64, the function directly returns it. Otherwise,
it returns a cloned graph of idtype int64 with features copied (shallow copy).
Returns
-------
DGLHeteroGraph
The graph object
DGLGraph
The graph of idtype int64.
Examples
--------
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game',
>>> idtype=torch.int32)
>>> g_long = g.long() # Convert g to int64 indexed, not changing the original `g`
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph of idtype int32.
>>> # (0, 1), (0, 2), (1, 2)
>>> g = dgl.graph((torch.tensor([0, 0, 1]).int(), torch.tensor([1, 2, 2]).int()))
>>> g.ndata['feat'] = torch.ones(3, 1)
>>> g.idtype
torch.int32
Cast the graph to one of idtype int64.
>>> # A cloned graph with an idtype of int64
>>> g_long = g.long()
>>> g_long.idtype
torch.int64
>>> # The idtype of the original graph does not change.
>>> g.idtype
torch.int32
>>> g_long.edges()
(tensor([0, 0, 1]), tensor([1, 2, 2]))
>>> g_long.ndata
{'feat': tensor([[1.],
[1.],
[1.]])}
See Also
--------
int
idtype
astype
"""
return self.astype(F.int64)
def int(self):
"""Return a heterograph object use int32 as index dtype,
with the ndata and edata as the original object
"""Cast the graph to one with idtype int32
If the graph already has idtype int32, the function directly returns it. Otherwise,
it returns a cloned graph of idtype int32 with features copied (shallow copy).
Returns
-------
DGLHeteroGraph
The graph object
DGLGraph
The graph of idtype int32.
Examples
--------
>>> g = dgl.bipartite(([0, 1, 1], [0, 0, 2]), 'user', 'plays', 'game',
>>> idtype=torch.int64)
>>> g_int = g.int() # Convert g to int32 indexed, not changing the original `g`
The following example uses PyTorch backend.
>>> import dgl
>>> import torch
Create a graph of idtype int64.
>>> # (0, 1), (0, 2), (1, 2)
>>> g = dgl.graph((torch.tensor([0, 0, 1]), torch.tensor([1, 2, 2])))
>>> g.ndata['feat'] = torch.ones(3, 1)
>>> g.idtype
torch.int64
Cast the graph to one of idtype int32.
>>> # A cloned graph with an idtype of int32
>>> g_int = g.int()
>>> g_int.idtype
torch.int32
>>> # The idtype of the original graph does not change.
>>> g.idtype
torch.int64
>>> g_int.edges()
(tensor([0, 0, 1], dtype=torch.int32), tensor([1, 2, 2], dtype=torch.int32))
>>> g_int.ndata
{'feat': tensor([[1.],
[1.],
[1.]])}
See Also
--------
long
idtype
astype
"""
return self.astype(F.int32)
......@@ -4280,7 +5562,7 @@ def reduce_dict_data(frames, reducer, order=None):
ret[k] = merger(flist)
return ret
def combine_frames(frames, ids):
def combine_frames(frames, ids, col_names=None):
"""Merge the frames into one frame, taking the common columns.
Return None if there is no common columns.
......@@ -4291,6 +5573,8 @@ def combine_frames(frames, ids):
List of frames
ids : List[int]
List of frame IDs
col_names : List[str], optional
Column names to consider. If not given, it considers all columns.
Returns
-------
......@@ -4298,7 +5582,10 @@ def combine_frames(frames, ids):
The resulting frame
"""
# find common columns and check if their schemes match
schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()}
if col_names is None:
schemes = {key: scheme for key, scheme in frames[ids[0]].schemes.items()}
else:
schemes = {key: frames[ids[0]].schemes[key] for key in col_names}
for frame_id in ids:
frame = frames[frame_id]
for key, scheme in list(schemes.items()):
......
......@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
"""
if order is None:
order = ""
elif order not in ['srcdst', 'eid']:
raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
"got {}".format(order))
edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
src = F.from_dgl_nd(edge_array(0))
dst = F.from_dgl_nd(edge_array(1))
......
......@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
is the output feature size.
"""
with graph.local_scope():
assert graph.is_homogeneous(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
assert graph.is_homogeneous, \
"not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
ctx=feat.context)
feat = nd.concat(feat, zero_pad, dim=-1)
......
......@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
mx.ndarray.NDArray
New node features.
"""
assert g.is_homogeneous(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
assert g.is_homogeneous, \
"not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
with g.local_scope():
g.ndata['h'] = x
g.edata['type'] = etypes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment