Unverified Commit cd484352 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Transform] Docstring and subframes (#1962)



* update knn graph docs

* more docs

* [Doc] transform module docstrings

* remove copy_ndata and copy_edata

* fix

* lint

* fix

* fix

* fix

* clean up docstrings

* fix docstring

* dtype specifications

* addresses comments

* fix
Co-authored-by: default avatarMufei Li <mufeili1996@gmail.com>
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent 6294677f
.. _api-sampling:
.. _api-dataloading:
dgl.dataloading
=================================
.. automodule:: dgl.dataloading
DataLoaders
-----------
PyTorch node/edge DataLoaders
-----------------------------
`````````````````````````````
.. currentmodule:: dgl.dataloading.pytorch
.. autoclass:: pytorch.NodeDataLoader
.. autoclass:: pytorch.EdgeDataLoader
.. autoclass:: NodeDataLoader
.. autoclass:: EdgeDataLoader
General collating functions
---------------------------
```````````````````````````
.. currentmodule:: dgl.dataloading
.. autoclass:: Collator
:members: dataset, collate
.. autoclass:: NodeCollator
:members: dataset, collate
.. autoclass:: EdgeCollator
:members: dataset, collate
Neighborhood Sampling Classes
-----------------------------
Base Multi-layer Neighborhood Sampling Class
--------------------------------------------
````````````````````````````````````````````
.. autoclass:: BlockSampler
:members: sample_frontier, sample_blocks
Uniform Node-wise Neighbor Sampling (GraphSAGE style)
-----------------------------------------------------
`````````````````````````````````````````````````````
.. autoclass:: MultiLayerNeighborSampler
:members: sample_frontier
.. _negative-sampling:
Negative Samplers for Link Prediction
-------------------------------------
.. autoclass:: negative_sampler.Uniform
.. currentmodule:: dgl.dataloading.negative_sampler
.. autoclass:: Uniform
:members: __call__
......@@ -11,4 +11,4 @@ API Reference
dgl.ops
dgl.function
sampling
dataloading
dgl.dataloading
......@@ -5,8 +5,6 @@ dgl.sampling
.. automodule:: dgl.sampling
Sampling algorithms on graphs.
Random walk sampling functions
------------------------------
......
.. _api-transform:
dgl.transform
=================================
.. automodule:: dgl.transform
Common algorithms on graphs.
.. autosummary::
:toctree: ../../generated/
line_graph
khop_adj
khop_graph
reverse
to_simple_graph
to_bidirected
laplacian_lambda_max
knn_graph
segmented_knn_graph
add_self_loop
remove_self_loop
metapath_reachable_graph
compact_graphs
to_block
to_simple
in_subgraph
out_subgraph
remove_edges
as_immutable_graph
as_heterograph
......@@ -108,6 +108,7 @@ Getting Started
api/python/dgl.ops
api/python/dgl.function
api/python/sampling
api/python/dgl.dataloading
.. toctree::
:maxdepth: 3
......
......@@ -3,10 +3,10 @@ computation dependency of necessary nodes with neighborhood sampling methods.
This includes
* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader`` for iterating over the nodes in
* :py:class:`~dgl.dataloading.pytorch.NodeDataLoader` for iterating over the nodes in
a graph in minibatches.
* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader`` for iterating over the edges in
* :py:class:`~dgl.dataloading.pytorch.EdgeDataLoader` for iterating over the edges in
a graph in minibatches.
* Various sampler classes that perform neighborhood sampling for multi-layer GNNs.
......
......@@ -61,17 +61,18 @@ def _find_exclude_eids_with_reverse_types(g, eids, reverse_etype_map):
return exclude_eids
def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
"""Find all edge IDs to exclude according to ``exclude_mode``.
"""Find all edge IDs to exclude according to :attr:`exclude_mode`.
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph.
exclude_mode : str, optional
Can be either of the following,
None (default)
Does not exclude any edge.
'reverse_id'
Exclude all edges specified in ``eids``, as well as their reverse edges
of the same edge type.
......@@ -81,6 +82,7 @@ def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
This mode assumes that the reverse of an edge with ID ``e`` and type
``etype`` will have ID ``reverse_eid_map[e]`` and type ``etype``.
'reverse_types'
Exclude all edges specified in ``eids``, as well as their reverse
edges of the corresponding edge types.
......@@ -110,32 +112,32 @@ def _find_exclude_eids(g, exclude_mode, eids, **kwargs):
class BlockSampler(object):
"""Abstract class specifying the neighborhood sampling strategy for DGL data loaders.
The main method for BlockSampler is :func:`~dgl.dataloading.BlockSampler.sample_blocks`,
The main method for BlockSampler is :meth:`sample_blocks`,
which generates a list of blocks for a multi-layer GNN given a set of seed nodes to
have their outputs computed.
The default implementation of :py:meth:`~dgl.dataloading.BlockSampler.sample_blocks` is
to repeat ``num_layers`` times the following procedure from the last layer to the first
The default implementation of :meth:`sample_blocks` is
to repeat :attr:`num_layers` times the following procedure from the last layer to the first
layer:
* Obtain a frontier. The frontier is defined as a graph with the same nodes as the
original graph but only the edges involved in message passing on the current layer.
Customizable via :py:meth:`~dgl.dataloading.BlockSampler.sample_frontier`.
Customizable via :meth:`sample_frontier`.
* Optionally, if the task is link prediction or edge classfication, remove edges
connecting training node pairs. If the graph is undirected, also remove the
reverse edges. This is controlled by the argument :attr:`exclude_eids` in
:py:meth:``~dgl.dataloading.BlockSampler.sample_blocks`` method.
:meth:`sample_blocks` method.
* Convert the frontier into a block.
* Optionally assign the IDs of the edges in the original graph selected in the first step
to the block, controlled by the argument ``return_eids`` in
:py:meth:``~dgl.dataloading.BlockSampler.sample_blocks`` method.
:meth:`sample_blocks` method.
* Prepend the block to the block list to be returned.
All subclasses should override :py:meth:`~dgl.dataloading.BlockSampler.sample_frontier`
All subclasses should override :meth:`sample_frontier`
method while specifying the number of layers to sample in :attr:`num_layers` argument.
Parameters
......@@ -148,7 +150,7 @@ class BlockSampler(object):
Notes
-----
For the concept of frontiers and blocks, please refer to User Guide Section 6.
For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
"""
def __init__(self, num_layers, return_eids):
self.num_layers = num_layers
......@@ -157,11 +159,13 @@ class BlockSampler(object):
def sample_frontier(self, block_id, g, seed_nodes):
"""Generate the frontier given the output nodes.
The subclasses should override this function.
Parameters
----------
block_id : int
Represents which GNN layer the frontier is generated for.
g : DGLHeteroGraph
g : DGLGraph
The original graph.
seed_nodes : Tensor or dict[ntype, Tensor]
The output nodes by node type.
......@@ -171,12 +175,12 @@ class BlockSampler(object):
Returns
-------
DGLHeteroGraph
DGLGraph
The frontier generated for the current layer.
See also
--------
For the concept of frontiers and blocks, please refer to User Guide Section 6.
Notes
-----
For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
"""
raise NotImplementedError
......@@ -185,7 +189,7 @@ class BlockSampler(object):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The original graph.
seed_nodes : Tensor or dict[ntype, Tensor]
The output nodes by node type.
......@@ -197,12 +201,12 @@ class BlockSampler(object):
Returns
-------
list[DGLHeteroGraph]
list[DGLGraph]
The blocks generated for computing the multi-layer GNN output.
See also
--------
For the concept of frontiers and blocks, please refer to User Guide Section 6.
Notes
-----
For the concept of frontiers and blocks, please refer to User Guide Section 6 [TODO].
"""
blocks = []
exclude_eids = (
......@@ -248,13 +252,13 @@ class BlockSampler(object):
class Collator(ABC):
"""Abstract DGL collator for training GNNs on downstream tasks stochastically.
Provides a ``dataset`` object containing the collection of all nodes or edges,
as well as a ``collate`` method that combines a set of items from ``dataset`` and
obtains the blocks.
Provides a :attr:`dataset` object containing the collection of all nodes or edges,
as well as a :attr:`collate` method that combines a set of items from
:attr:`dataset` and obtains the blocks.
See also
--------
For the concept of blocks, please refer to User Guide Section 6.
Notes
-----
For the concept of blocks, please refer to User Guide Section 6 [TODO].
"""
@abstractproperty
def dataset(self):
......@@ -268,11 +272,11 @@ class Collator(ABC):
Parameters
----------
items : list[str, int]
The list of node or edge type-ID pairs.
The list of node or edge IDs or type-ID pairs.
See also
--------
For the concept of blocks, please refer to User Guide Section 6.
Notes
-----
For the concept of blocks, please refer to User Guide Section 6 [TODO].
"""
raise NotImplementedError
......@@ -282,7 +286,7 @@ class NodeCollator(Collator):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph.
nids : Tensor or dict[ntype, Tensor]
The node set to compute outputs.
......@@ -324,6 +328,12 @@ class NodeCollator(Collator):
"""Find the list of blocks necessary for computing the representation of given
nodes for a node classification/regression task.
Parameters
----------
items : list[int] or list[tuple[str, int]]
Either a list of node IDs (for homogeneous graphs), or a list of node type-ID
pairs (for heterogeneous graphs).
Returns
-------
input_nodes : Tensor or dict[ntype, Tensor]
......@@ -336,7 +346,7 @@ class NodeCollator(Collator):
If the original graph has multiple node types, return a dictionary of
node type names and node ID tensors. Otherwise, return a single tensor.
blocks : list[DGLHeteroGraph]
blocks : list[DGLGraph]
The list of blocks necessary for computing the representation.
"""
if isinstance(items[0], tuple):
......@@ -369,14 +379,14 @@ class EdgeCollator(Collator):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph from which the edges are iterated in minibatches and the subgraphs
are generated.
eids : Tensor or dict[etype, Tensor]
The edge set in graph :attr:`g` to compute outputs.
block_sampler : dgl.dataloading.BlockSampler
The neighborhood sampler.
g_sampling : DGLHeteroGraph, optional
g_sampling : DGLGraph, optional
The graph where neighborhood sampling and message passing is performed.
Note that this is not necessarily the same as :attr:`g`.
......@@ -425,7 +435,7 @@ class EdgeCollator(Collator):
or a dictionary of edge types and such pairs if the graph is heterogenenous.
A set of builtin negative samplers are provided in
:py:mod:`dgl.dataloading.negative_sampler`.
:ref:`the negative sampling module <negative-sampling>`.
Examples
--------
......@@ -613,6 +623,12 @@ class EdgeCollator(Collator):
"""Combines the sampled edges into a minibatch for edge classification, edge
regression, and link prediction tasks.
Parameters
----------
items : list[int] or list[tuple[str, int]]
Either a list of edge IDs (for homogeneous graphs), or a list of edge type-ID
pairs (for heterogeneous graphs).
Returns
-------
Either ``(input_nodes, pair_graph, blocks)``, or
......@@ -624,19 +640,19 @@ class EdgeCollator(Collator):
If the original graph has multiple node types, return a dictionary of
node type names and node ID tensors. Otherwise, return a single tensor.
pair_graph : DGLHeteroGraph
pair_graph : DGLGraph
The graph that contains only the edges in the minibatch as well as their incident
nodes.
Note that the metagraph of this graph will be identical to that of the original
graph.
negative_pair_graph : DGLHeteroGraph
negative_pair_graph : DGLGraph
The graph that contains only the edges connecting the source and destination nodes
yielded from the given negative sampler, if negative sampling is enabled.
Note that the metagraph of this graph will be identical to that of the original
graph.
blocks : list[DGLHeteroGraph]
blocks : list[DGLGraph]
The list of blocks necessary for computing the representation of the edges.
"""
if self.negative_sampler is None:
......
......@@ -11,7 +11,7 @@ class _BaseNegativeSampler(object):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph.
eids : Tensor or dict[etype, Tensor]
The sampled edges in the minibatch.
......
......@@ -9,14 +9,14 @@ class NodeDataLoader(DataLoader):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph.
nids : Tensor or dict[ntype, Tensor]
The node set to compute outputs.
block_sampler : :py:class:`~dgl.dataloading.BlockSampler`
block_sampler : dgl.dataloading.BlockSampler
The neighborhood sampler.
kwargs : dict
Arguments being passed to ``torch.utils.data.DataLoader``.
Arguments being passed to :py:class:`torch.utils.data.DataLoader`.
Examples
--------
......@@ -52,13 +52,13 @@ class EdgeDataLoader(DataLoader):
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
The graph.
nids : Tensor or dict[ntype, Tensor]
The node set to compute outputs.
block_sampler : :py:class:`~dgl.dataloading.BlockSampler`
block_sampler : dgl.dataloading.BlockSampler
The neighborhood sampler.
g_sampling : DGLHeteroGraph, optional
g_sampling : DGLGraph, optional
The graph where neighborhood sampling is performed.
One may wish to iterate over the edges in one graph while perform sampling in
......@@ -72,20 +72,28 @@ class EdgeDataLoader(DataLoader):
minibatch. Possible values are
* None,
* ``reverse``,
* ``reverse_id``,
* ``reverse_types``
See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
See the description of the argument with the same name in the docstring of
:class:`~dgl.dataloading.EdgeCollator` for more details.
reverse_edge_ids : Tensor or dict[etype, Tensor], optional
See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
The mapping from the original edge IDs to the ID of their reverse edges.
See the description of the argument with the same name in the docstring of
:class:`~dgl.dataloading.EdgeCollator` for more details.
reverse_etypes : dict[etype, etype], optional
See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
The mapping from the original edge types to their reverse edge types.
See the description of the argument with the same name in the docstring of
:class:`~dgl.dataloading.EdgeCollator` for more details.
negative_sampler : callable, optional
The negative sampler.
See the docstring in :py:class:`~dgl.dataloading.EdgeCollator`.
See the description of the argument with the same name in the docstring of
:class:`~dgl.dataloading.EdgeCollator` for more details.
kwargs : dict
Arguments being passed to `torch.utils.data.DataLoader`.
Arguments being passed to :py:class:`torch.utils.data.DataLoader`.
Examples
--------
......@@ -167,7 +175,7 @@ class EdgeDataLoader(DataLoader):
See also
--------
:py:class:`~dgl.dataloading.EdgeCollator`
:class:`~dgl.dataloading.EdgeCollator`
For end-to-end usages, please refer to the following tutorial/examples:
......
......@@ -216,12 +216,12 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
vwgt = F.stack(vwgt, 1)
shape = (np.prod(F.shape(vwgt),),)
vwgt = F.reshape(vwgt, shape)
vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
vwgt = F.to_dgl_nd(vwgt)
print(
'Construct multi-constraint weights: {:.3f} seconds'.format(time.time() - start))
else:
vwgt = F.zeros((0,), F.int64, F.cpu())
vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
vwgt = F.to_dgl_nd(vwgt)
start = time.time()
node_part = _CAPI_DGLMetisPartition_Hetero(sym_g._graph, k, vwgt)
......
"""Sampling operators.
This module contains the implementations of various sampling operators.
"""This module contains the implementations of various sampling operators.
"""
from .randomwalks import *
from .pinsage import *
......
......@@ -24,7 +24,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
Parameters
----------
g : DGLGraph
The graph
The graph. Must be on CPU.
nodes : tensor or dict
Node IDs to sample neighbors from.
......@@ -57,7 +57,7 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
Returns
-------
DGLGraph
A sampled subgraph containing only the sampled neighboring edges.
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
Examples
--------
......@@ -95,6 +95,8 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
if len(g.ntypes) > 1:
raise DGLError("Must specify node type when the graph is not homogeneous.")
nodes = {g.ntypes[0] : nodes}
assert g.device == F.cpu(), "Graph must be on CPU."
nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')
nodes_all_types = []
for ntype in g.ntypes:
......@@ -147,7 +149,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
Parameters
----------
g : DGLGraph
The graph
The graph. Must be on CPU.
k : int or dict[etype, int]
The number of edges to be selected for each node on each edge type.
......@@ -178,7 +180,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
Returns
-------
DGLGraph
A sampled subgraph containing only the sampled neighboring edges.
A sampled subgraph containing only the sampled neighboring edges. It is on CPU.
Examples
--------
......@@ -195,6 +197,7 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
if len(g.ntypes) > 1:
raise DGLError("Must specify node type when the graph is not homogeneous.")
nodes = {g.ntypes[0] : nodes}
assert g.device == F.cpu(), "Graph must be on CPU."
# Parse nodes into a list of NDArrays.
nodes = utils.prepare_tensor_dict(g, nodes, 'nodes')
......
......@@ -8,6 +8,7 @@ from .. import transform
from .randomwalks import random_walk
from .neighbor import select_topk
from ..base import EID
from .. import utils
class RandomWalkNeighborSampler(object):
......@@ -29,7 +30,7 @@ class RandomWalkNeighborSampler(object):
Parameters
----------
G : DGLGraph
The graph.
The graph. It must be on CPU.
num_traversals : int
The maximum number of metapath-based traversals for a single random walk.
......@@ -53,24 +54,13 @@ class RandomWalkNeighborSampler(object):
The name of the edge feature to be stored on the returned graph with the number of
visits.
Inputs
------
seed_nodes : Tensor
A tensor of given node IDs of node type ``ntype`` to generate neighbors from. The
node type ``ntype`` is the beginning and ending node type of the given metapath.
Outputs
-------
g : DGLGraph
A homogeneous graph constructed by selecting neighbors for each given node according
to the algorithm above.
Examples
--------
See examples in :any:`PinSAGESampler`.
"""
def __init__(self, G, num_traversals, termination_prob,
num_random_walks, num_neighbors, metapath=None, weight_column='weights'):
assert G.device == F.cpu(), "Graph must be on CPU."
self.G = G
self.weight_column = weight_column
self.num_random_walks = num_random_walks
......@@ -96,6 +86,23 @@ class RandomWalkNeighborSampler(object):
# pylint: disable=no-member
def __call__(self, seed_nodes):
"""
Parameters
----------
seed_nodes : Tensor
A tensor of given node IDs of node type ``ntype`` to generate neighbors from. The
node type ``ntype`` is the beginning and ending node type of the given metapath.
It must be on CPU and have the same dtype as the ID type of the graph.
Returns
-------
g : DGLGraph
A homogeneous graph constructed by selecting neighbors for each given node according
to the algorithm above. The returned graph is on CPU.
"""
seed_nodes = utils.prepare_tensor(self.G, seed_nodes, 'seed_nodes')
seed_nodes = F.repeat(seed_nodes, self.num_random_walks, 0)
paths, _ = random_walk(
self.G, seed_nodes, metapath=self.full_metapath, restart_prob=self.restart_prob)
......@@ -163,17 +170,6 @@ class PinSAGESampler(RandomWalkNeighborSampler):
The name of the edge feature to be stored on the returned graph with the number of
visits.
Inputs
------
seed_nodes : Tensor
A tensor of given node IDs of node type ``ntype`` to generate neighbors from.
Outputs
-------
g : DGLHeteroGraph
A homogeneous graph constructed by selecting neighbors for each given node according
to PinSage algorithm.
Examples
--------
Generate a random bidirectional bipartite graph with 3000 "A" nodes and 5000 "B" nodes.
......
......@@ -29,22 +29,25 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
Parameters
----------
g : DGLGraph
The graph.
The graph. Must be on CPU.
nodes : Tensor
Node ID tensor from which the random walk traces starts.
The tensor must be on CPU, and must have the same dtype as the ID type
of the graph.
metapath : list[str or tuple of str], optional
Metapath, specified as a list of edge types.
Mutually exclusive with ``length``.
Mutually exclusive with :attr:`length`.
If omitted, DGL assumes that ``g`` only has one node & edge type. In this
case, the argument ``length`` specifies the length of random walk traces.
length : int, optional
Length of random walks.
Mutually exclusive with ``metapath``.
Mutually exclusive with :attr:`metapath`.
Only used when ``metapath`` is None.
Only used when :attr:`metapath` is None.
prob : str, optional
The name of the edge feature tensor on the graph storing the (unnormalized)
probabilities associated with each edge for choosing the next node.
......@@ -57,16 +60,23 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
restart_prob : float or Tensor, optional
Probability to terminate the current trace before each transition.
If a tensor is given, ``restart_prob`` should have the same length as ``metapath``.
If a tensor is given, :attr:`restart_prob` should have the same length as
:attr:`metapath` or :attr:`length`.
Returns
-------
traces : Tensor
A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)``.
A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or
``(num_seeds, length + 1)`` if :attr:`metapath` is None.
types : Tensor
A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)``.
A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or
``(length + 1)``.
The type IDs match the ones in the original graph ``g``.
Notes
-----
The returned tensors are on CPU.
Examples
--------
The following creates a homogeneous graph:
......@@ -126,6 +136,7 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
[ 2, 0, 1, 1, 3, 2, 2],
[ 0, 1, 1, 3, 0, 0, 0]]), tensor([0, 0, 1, 0, 0, 1, 0]))
"""
assert g.device == F.cpu(), "Graph must be on CPU."
n_etypes = len(g.canonical_etypes)
n_ntypes = len(g.ntypes)
......@@ -139,8 +150,8 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
metapath = [g.get_etype_id(etype) for etype in metapath]
gidx = g._graph
nodes = utils.toindex(nodes, g._idtype_str).todgltensor()
metapath = utils.toindex(metapath, g._idtype_str).todgltensor().copyto(nodes.ctx)
nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, 'nodes'))
metapath = F.to_dgl_nd(utils.prepare_tensor(g, metapath, 'metapath'))
# Load the probability tensor from the edge frames
if prob is None:
......@@ -149,7 +160,7 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
p_nd = []
for etype in g.canonical_etypes:
if prob in g.edges[etype].data:
prob_nd = F.zerocopy_to_dgl_ndarray(g.edges[etype].data[prob])
prob_nd = F.to_dgl_nd(g.edges[etype].data[prob])
if prob_nd.ctx != nodes.ctx:
raise ValueError(
'context of seed node array and edges[%s].data[%s] are different' %
......@@ -162,15 +173,15 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
if restart_prob is None:
traces, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd)
elif F.is_tensor(restart_prob):
restart_prob = F.zerocopy_to_dgl_ndarray(restart_prob)
restart_prob = F.to_dgl_nd(restart_prob)
traces, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart(
gidx, nodes, metapath, p_nd, restart_prob)
else:
traces, types = _CAPI_DGLSamplingRandomWalkWithRestart(
gidx, nodes, metapath, p_nd, restart_prob)
traces = F.zerocopy_from_dgl_ndarray(traces)
types = F.zerocopy_from_dgl_ndarray(types)
traces = F.from_dgl_nd(traces)
types = F.from_dgl_nd(types)
return traces, types
def pack_traces(traces, types):
......@@ -181,9 +192,9 @@ def pack_traces(traces, types):
Parameters
----------
traces : Tensor
A 2-dimensional node ID tensor.
A 2-dimensional node ID tensor. Must be on CPU and either ``int32`` or ``int64``.
types : Tensor
A 1-dimensional node type ID tensor.
A 1-dimensional node type ID tensor. Must be on CPU and either ``int32`` or ``int64``.
Returns
-------
......@@ -197,6 +208,10 @@ def pack_traces(traces, types):
offsets : Tensor
Offset of each trace in the originial traces tensor in the new concatenated tensor.
Notes
-----
The returned tensors are on CPU.
Examples
--------
>>> g2 = dgl.heterograph({
......@@ -233,15 +248,17 @@ def pack_traces(traces, types):
>>> vids[1], vtypes[1]
(tensor([0, 1, 1, 3, 0, 0, 0]), tensor([0, 0, 1, 0, 0, 1, 0]))
"""
traces = F.zerocopy_to_dgl_ndarray(traces)
types = F.zerocopy_to_dgl_ndarray(types)
assert F.is_tensor(traces) and F.context(traces) == F.cpu(), "traces must be a CPU tensor"
assert F.is_tensor(types) and F.context(types) == F.cpu(), "types must be a CPU tensor"
traces = F.to_dgl_nd(traces)
types = F.to_dgl_nd(types)
concat_vids, concat_types, lengths, offsets = _CAPI_DGLSamplingPackTraces(traces, types)
concat_vids = F.zerocopy_from_dgl_ndarray(concat_vids)
concat_types = F.zerocopy_from_dgl_ndarray(concat_types)
lengths = F.zerocopy_from_dgl_ndarray(lengths)
offsets = F.zerocopy_from_dgl_ndarray(offsets)
concat_vids = F.from_dgl_nd(concat_vids)
concat_types = F.from_dgl_nd(concat_types)
lengths = F.from_dgl_nd(lengths)
offsets = F.from_dgl_nd(offsets)
return concat_vids, concat_types, lengths, offsets
......
......@@ -43,7 +43,7 @@ def node_subgraph(graph, nodes):
Returns
-------
G : DGLHeteroGraph
G : DGLGraph
The subgraph.
The nodes and edges in the subgraph are relabeled using consecutive
......@@ -59,16 +59,18 @@ def node_subgraph(graph, nodes):
Instantiate a heterograph.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set node features
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> g.subgraph({'user': [4, 5]})
An error occurs as these nodes do not exist.
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.subgraph({'user': [1, 2]})
>>> print(sub_g)
Graph(num_nodes={'user': 2, 'game': 0},
......@@ -158,7 +160,7 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
Returns
-------
G : DGLHeteroGraph
G : DGLGraph
The subgraph.
The nodes and edges are relabeled using consecutive integers from 0.
......@@ -173,16 +175,18 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
Instantiate a heterograph.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> g.edge_subgraph({('user', 'follows', 'user'): [5, 6]})
An error occurs as these edges do not exist.
Traceback (most recent call last):
...
dgl._ffi.base.DGLError: ...
>>> sub_g = g.edge_subgraph({('user', 'follows', 'user'): [1, 2],
>>> ('user', 'plays', 'game'): [2]})
>>> print(sub_g)
......@@ -244,15 +248,18 @@ def edge_subgraph(graph, edges, preserve_nodes=False):
DGLHeteroGraph.edge_subgraph = edge_subgraph
def in_subgraph(g, nodes):
"""Extract the subgraph containing only the in edges of the given nodes.
"""Return the subgraph induced on the inbound edges of all edge types of the
given nodes.
The subgraph keeps the same type schema and the cardinality of the original one.
Node/edge features are not preserved. The original IDs
the extracted edges are stored as the `dgl.EID` feature in the returned graph.
All the nodes are preserved regardless of whether they have an edge or not.
The metagraph of the returned subgraph is the same as the parent graph.
Features are copied from the original graph.
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
Full graph structure.
nodes : tensor or dict
Node ids to sample neighbors from. The allowed types
......@@ -261,8 +268,53 @@ def in_subgraph(g, nodes):
Returns
-------
DGLHeteroGraph
DGLGraph
The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.in_subgraph({'user': [2], 'game': [2]})
>>> print(sub_g)
Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also
--------
out_subgraph
"""
if g.is_block:
raise DGLError('Extracting subgraph of a block graph is not allowed.')
......@@ -285,15 +337,18 @@ def in_subgraph(g, nodes):
DGLHeteroGraph.in_subgraph = in_subgraph
def out_subgraph(g, nodes):
"""Extract the subgraph containing only the out edges of the given nodes.
"""Return the subgraph induced on the outbound edges of all edge types of the
given nodes.
The subgraph keeps the same type schema and the cardinality of the original one.
Node/edge features are not preserved. The original IDs
the extracted edges are stored as the `dgl.EID` feature in the returned graph.
All the nodes are preserved regardless of whether they have an edge or not.
The metagraph of the returned subgraph is the same as the parent graph.
Features are copied from the original graph.
Parameters
----------
g : DGLHeteroGraph
g : DGLGraph
Full graph structure.
nodes : tensor or dict
Node ids to sample neighbors from. The allowed types
......@@ -302,8 +357,53 @@ def out_subgraph(g, nodes):
Returns
-------
DGLHeteroGraph
DGLGraph
The subgraph.
One can retrieve the mapping from subgraph edge ID to parent
edge ID via ``dgl.EID`` edge features of the subgraph.
Examples
--------
The following example uses PyTorch backend.
Instantiate a heterograph.
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
Get subgraphs.
>>> sub_g = g.out_subgraph({'user': [1]})
>>> print(sub_g)
Graph(num_nodes={'game': 3, 'user': 3},
num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2},
metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')])
Get the original node/edge indices.
>>> sub_g.edges['plays'].data[dgl.EID]
tensor([1, 2])
>>> sub_g.edges['follows'].data[dgl.EID]
tensor([1, 2])
Get the copied edge features.
>>> sub_g.edges['follows'].data['h']
tensor([[1.],
[2.]])
>>> sub_g.edges['follows'].data['h'] += 1
>>> g.edges['follows'].data['h'] # Features are not shared.
tensor([[0.],
[1.],
[2.]])
See also
--------
in_subgraph
"""
if g.is_block:
raise DGLError('Extracting subgraph of a block graph is not allowed.')
......@@ -342,7 +442,7 @@ def node_type_subgraph(graph, ntypes):
Returns
-------
G : DGLHeteroGraph
G : DGLGraph
The subgraph.
Examples
......@@ -351,9 +451,9 @@ def node_type_subgraph(graph, ntypes):
Instantiate a heterograph.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set node features
>>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]])
......@@ -409,7 +509,7 @@ def edge_type_subgraph(graph, etypes):
Returns
-------
G : DGLHeteroGraph
G : DGLGraph
The subgraph.
Examples
......@@ -418,9 +518,9 @@ def edge_type_subgraph(graph, etypes):
Instantiate a heterograph.
>>> plays_g = dgl.bipartite(([0, 1, 1, 2], [0, 0, 2, 1]), 'user', 'plays', 'game')
>>> follows_g = dgl.graph(([0, 1, 1], [1, 2, 2]), 'user', 'follows')
>>> g = dgl.hetero_from_relations([plays_g, follows_g])
>>> g = dgl.heterograph({
... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]),
... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])})
>>> # Set edge features
>>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]])
......@@ -495,9 +595,10 @@ def _create_hetero_subgraph(parent, sgi, induced_nodes, induced_edges):
DGLGraph
Graph
"""
node_frames, edge_frames = utils.extract_subframes(parent, induced_nodes, induced_edges)
hsg = DGLHeteroGraph(sgi.graph, parent.ntypes, parent.etypes,
node_frames, edge_frames)
node_frames = utils.extract_node_subframes(parent, induced_nodes)
edge_frames = utils.extract_edge_subframes(parent, induced_edges)
hsg = DGLHeteroGraph(sgi.graph, parent.ntypes, parent.etypes)
utils.set_new_frames(hsg, node_frames=node_frames, edge_frames=edge_frames)
return hsg
_init_api("dgl.subgraph")
This diff is collapsed.
......@@ -754,8 +754,8 @@ def relabel(x):
F.copy_to(F.arange(0, len(unique_x), dtype), ctx))
return unique_x, old_to_new
def extract_subframes(graph, nodes, edges):
"""Extract node/edge features of the given nodes and edges from :attr:`graph`
def extract_node_subframes(graph, nodes):
"""Extract node features of the given nodes from :attr:`graph`
and return them in frames.
Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
......@@ -771,17 +771,11 @@ def extract_subframes(graph, nodes, edges):
Node IDs. If not None, the list length must be equal to the number of node types
in the graph. The returned frames store the node IDs in the ``dgl.NID`` field
unless it is None, which means the whole frame is shallow-copied.
edges : list[Tensor] or None
Edge IDs. If not None, the list length must be equal to the number of edge types
in the graph. The returned frames store the edge IDs in the ``dgl.NID`` field
unless it is None, which means the whole frame is shallow-copied.
Returns
-------
list[Frame]
Extracted node frames.
list[Frame]
Extracted edge frames.
"""
if nodes is None:
node_frames = [nf.clone() for nf in graph._node_frames]
......@@ -791,6 +785,67 @@ def extract_subframes(graph, nodes, edges):
subf = graph._node_frames[i].subframe(ind_nodes)
subf[NID] = ind_nodes
node_frames.append(subf)
return node_frames
def extract_node_subframes_for_block(graph, srcnodes, dstnodes):
"""Extract the input node features and output node features of the given nodes from
:attr:`graph` and return them in frames ready for a block.
Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
to get the features. If :attr:`srcnodes` or :attr:`dstnodes` is None, it performs a
shallow copy of the original node frames that only copies the dictionary structure
but not the tensor contents.
Parameters
----------
graph : DGLGraph
The graph to extract features from.
srcnodes : list[Tensor]
Input node IDs. The list length must be equal to the number of node types
in the graph. The returned frames store the node IDs in the ``dgl.NID`` field.
dstnodes : list[Tensor]
Output node IDs. The list length must be equal to the number of node types
in the graph. The returned frames store the node IDs in the ``dgl.NID`` field.
Returns
-------
list[Frame]
Extracted node frames.
"""
node_frames = []
for i, ind_nodes in enumerate(srcnodes):
subf = graph._node_frames[i].subframe(ind_nodes)
subf[NID] = ind_nodes
node_frames.append(subf)
for i, ind_nodes in enumerate(dstnodes):
subf = graph._node_frames[i].subframe(ind_nodes)
subf[NID] = ind_nodes
node_frames.append(subf)
return node_frames
def extract_edge_subframes(graph, edges):
"""Extract edge features of the given edges from :attr:`graph`
and return them in frames.
Note that this function does not perform actual tensor memory copy but using `Frame.subframe`
to get the features. If :attr:`edges` is None, it performs a shallow copy of the
original edge frames that only copies the dictionary structure but not the tensor
contents.
Parameters
----------
graph : DGLGraph
The graph to extract features from.
edges : list[Tensor] or None
Edge IDs. If not None, the list length must be equal to the number of edge types
in the graph. The returned frames store the edge IDs in the ``dgl.NID`` field
unless it is None, which means the whole frame is shallow-copied.
Returns
-------
list[Frame]
Extracted edge frames.
"""
if edges is None:
edge_frames = [nf.clone() for nf in graph._edge_frames]
else:
......@@ -799,7 +854,32 @@ def extract_subframes(graph, nodes, edges):
subf = graph._edge_frames[i].subframe(ind_edges)
subf[EID] = ind_edges
edge_frames.append(subf)
return node_frames, edge_frames
return edge_frames
def set_new_frames(graph, *, node_frames=None, edge_frames=None):
"""Set the node and edge frames of a given graph to new ones.
Parameters
----------
graph : DGLGraph
The graph whose node and edge frames are to be updated.
node_frames : list[Frame], optional
New node frames.
Default is None, where the node frames are not updated.
edge_frames : list[Frame], optional
New edge frames
Default is None, where the edge frames are not updated.
"""
if node_frames is not None:
assert len(node_frames) == len(graph.ntypes), \
"[BUG] number of node frames different from number of node types"
graph._node_frames = node_frames
if edge_frames is not None:
assert len(edge_frames) == len(graph.etypes), \
"[BUG] number of edge frames different from number of edge types"
graph._edge_frames = edge_frames
def set_num_threads(num_threads):
"""Set the number of OMP threads in the process.
......
......@@ -4,6 +4,7 @@ import numpy as np
import os
import dgl
import dgl.function as fn
import dgl.partition
import backend as F
from dgl.graph_index import from_scipy_sparse_matrix
import unittest
......@@ -329,8 +330,8 @@ def test_add_reverse_edges():
ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
assert F.array_equal(u, ub)
assert F.array_equal(v, vb)
assert len(bg.edges['plays'].data) == 0
assert len(bg.edges['follows'].data) == 0
assert set(bg.edges['plays'].data.keys()) == {dgl.EID}
assert set(bg.edges['follows'].data.keys()) == {dgl.EID}
# donot share ndata and edata
bg = dgl.add_reverse_edges(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True)
......@@ -448,7 +449,7 @@ def test_khop_adj():
feat = F.randn((N, 5))
g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
for k in range(3):
adj = F.tensor(dgl.khop_adj(g, k))
adj = F.tensor(F.swapaxes(dgl.khop_adj(g, k), 0, 1))
# use original graph to do message passing for k times.
g.ndata['h'] = feat
for _ in range(k):
......@@ -484,6 +485,7 @@ def create_large_graph(num_nodes):
row = np.random.choice(num_nodes, num_nodes * 10)
col = np.random.choice(num_nodes, num_nodes * 10)
spm = spsp.coo_matrix((np.ones(len(row)), (row, col)))
spm.sum_duplicates()
return dgl.graph(spm)
......@@ -495,6 +497,7 @@ def get_nodeflow(g, node_ids, num_layers):
seed_nodes=node_ids)
return next(iter(sampler))
# Disabled since everything will be on heterogeneous graphs
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
def test_partition_with_halo():
g = create_large_graph(1000)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment