Unverified Commit e667545d authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Feature] Node2vec (#2992)



* add seal example

* 1. add paper infomation in examples/README
2. adjust codes
3. option test

* use latest `to_simple` to replace coalesce graph function

* remove outdated codes

* remove useless comment

* Node2vec
1.implement node2vec random walk c++ op
2.implement node2vec model
3.implement node2vec example

* add CMakeLists file modify

* refine c++ codes

* refine c++ codes

* add missing whitespace

* refine python codes

* add codes

* add node2vec_impl.h

* fix codes

* fix code style problem

* fixes

* remove

* lots of changes

* add benchmark

* fixes
Co-authored-by: default avatarsmilexuhc <smile.xuhc@gmail.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 73594814
import time
import dgl
import torch
from .. import utils
def _random_walk(g, seeds, length):
return dgl.sampling.random_walk(g, seeds, length=length)
def _node2vec(g, seeds, length):
return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length)
@utils.benchmark('time')
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize('num_seeds', [10, 100, 1000])
@utils.parametrize('length', [2, 5, 10, 20])
@utils.parametrize('algorithm', ['_random_walk', '_node2vec'])
def track_time(graph_name, num_seeds, length, algorithm):
device = utils.get_bench_device()
graph = utils.get_graph(graph_name, 'csr')
seeds = torch.randint(0, graph.num_nodes(), (num_seeds,))
print(graph_name, num_seeds, length)
alg = globals()[algorithm]
# dry run
for i in range(5):
_ = alg(graph, seeds, length=length)
# timing
with utils.Timer() as t:
for i in range(50):
_ = alg(graph, seeds, length=length)
return t.elapsed_secs / 50
# DGL Implementation of the Node2vec
This DGL example implements the graph embedding model proposed in the paper
[node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653)
The author's codes of implementation is in [Node2vec](https://github.com/aditya-grover/node2vec)
Example implementor
----------------------
This example was implemented by [Smile](https://github.com/Smilexuhc) during his intern work at the AWS Shanghai AI Lab.
The graph dataset used in this example
---------------------------------------
cora
- NumNodes: 2708
- NumEdges: 10556
ogbn-products
- NumNodes: 2449029
- NumEdges: 61859140
Dependencies
--------------------------------
- python 3.6+
- Pytorch 1.5.0+
- ogb
How to run example files
--------------------------------
To train a node2vec model:
```shell script
python main.py --task="train"
```
To time node2vec random walks:
```shell script
python main.py --task="time" --runs=10
```
Performance
-------------------------
**Setting:** `walk_length=50, p=0.25, q=4.0`
| Dataset | DGL | PyG |
| -------- | :---------: | :---------: |
| cora | 0.0092s | 0.0179s |
| products | 66.22s | 77.65s |
Note that the number in table are the average results of multiple trials.
For cora, we run 50 trials. For ogbn-products, we run 10 trials.
import time
from dgl.sampling import node2vec_random_walk
from model import Node2vecModel
from utils import load_graph, parse_arguments
def time_randomwalk(graph, args):
"""
Test cost time of random walk
"""
start_time = time.time()
# default setting for testing
params = {'p': 0.25,
'q': 4,
'walk_length': 50}
for i in range(args.runs):
node2vec_random_walk(graph, graph.nodes(), **params)
end_time = time.time()
cost_time_avg = (end_time-start_time)/args.runs
print("Run dataset {} {} trials, mean run time: {:.3f}s".format(args.dataset, args.runs, cost_time_avg))
def train_node2vec(graph, eval_set, args):
"""
Train node2vec model
"""
trainer = Node2vecModel(graph,
embedding_dim=args.embedding_dim,
walk_length=args.walk_length,
p=args.p,
q=args.q,
num_walks=args.num_walks,
eval_set=eval_set,
eval_steps=1,
device=args.device)
trainer.train(epochs=args.epochs, batch_size=args.batch_size, learning_rate=0.01)
if __name__ == '__main__':
args = parse_arguments()
graph, eval_set = load_graph(args.dataset)
if args.task == 'train':
print("Perform training node2vec model")
train_node2vec(graph, eval_set, args)
elif args.task == 'time':
print("Timing random walks")
time_randomwalk(graph, args)
else:
raise ValueError('Task type error!')
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.linear_model import LogisticRegression
from dgl.sampling import node2vec_random_walk
class Node2vec(nn.Module):
"""Node2vec model from paper node2vec: Scalable Feature Learning for Networks <https://arxiv.org/abs/1607.00653>
Attributes
----------
g: DGLGraph
The graph.
embedding_dim: int
Dimension of node embedding.
walk_length: int
Length of each trace.
p: float
Likelihood of immediately revisiting a node in the walk. Same notation as in the paper.
q: float
Control parameter to interpolate between breadth-first strategy and depth-first strategy.
Same notation as in the paper.
num_walks: int
Number of random walks for each node. Default: 10.
window_size: int
Maximum distance between the center node and predicted node. Default: 5.
num_negatives: int
The number of negative samples for each positive sample. Default: 5.
use_sparse: bool
If set to True, use PyTorch's sparse embedding and optimizer. Default: ``True``.
weight_name : str, optional
The name of the edge feature tensor on the graph storing the (unnormalized)
probabilities associated with each edge for choosing the next node.
The feature tensor must be non-negative and the sum of the probabilities
must be positive for the outbound edges of all nodes (although they don't have
to sum up to one). The result will be undefined otherwise.
If omitted, DGL assumes that the neighbors are picked uniformly.
"""
def __init__(self, g, embedding_dim, walk_length, p, q, num_walks=10, window_size=5, num_negatives=5,
use_sparse=True, weight_name=None):
super(Node2vec, self).__init__()
assert walk_length >= window_size
self.g = g
self.embedding_dim = embedding_dim
self.walk_length = walk_length
self.p = p
self.q = q
self.num_walks = num_walks
self.window_size = window_size
self.num_negatives = num_negatives
self.N = self.g.num_nodes()
if weight_name is not None:
self.prob = weight_name
else:
self.prob = None
self.embedding = nn.Embedding(self.N, embedding_dim, sparse=use_sparse)
def reset_parameters(self):
self.embedding.reset_parameters()
def sample(self, batch):
"""
Generate positive and negative samples.
Positive samples are generated from random walk
Negative samples are generated from random sampling
"""
if not isinstance(batch, torch.Tensor):
batch = torch.tensor(batch)
batch = batch.repeat(self.num_walks)
# positive
pos_traces = node2vec_random_walk(self.g, batch, self.p, self.q, self.walk_length, self.prob)
pos_traces = pos_traces.unfold(1, self.window_size, 1) # rolling window
pos_traces = pos_traces.contiguous().view(-1, self.window_size)
# negative
neg_batch = batch.repeat(self.num_negatives)
neg_traces = torch.randint(self.N, (neg_batch.size(0), self.walk_length))
neg_traces = torch.cat([neg_batch.view(-1, 1), neg_traces], dim=-1)
neg_traces = neg_traces.unfold(1, self.window_size, 1) # rolling window
neg_traces = neg_traces.contiguous().view(-1, self.window_size)
return pos_traces, neg_traces
def forward(self, nodes=None):
"""
Returns the embeddings of the input nodes
Parameters
----------
nodes: Tensor, optional
Input nodes, if set `None`, will return all the node embedding.
Returns
-------
Tensor
Node embedding
"""
emb = self.embedding.weight
if nodes is None:
return emb
else:
return emb[nodes]
def loss(self, pos_trace, neg_trace):
"""
Computes the loss given positive and negative random walks.
Parameters
----------
pos_trace: Tensor
positive random walk trace
neg_trace: Tensor
negative random walk trace
"""
e = 1e-15
# Positive
pos_start, pos_rest = pos_trace[:, 0], pos_trace[:, 1:].contiguous() # start node and following trace
w_start = self.embedding(pos_start).unsqueeze(dim=1)
w_rest = self.embedding(pos_rest)
pos_out = (w_start * w_rest).sum(dim=-1).view(-1)
# Negative
neg_start, neg_rest = neg_trace[:, 0], neg_trace[:, 1:].contiguous()
w_start = self.embedding(neg_start).unsqueeze(dim=1)
w_rest = self.embedding(neg_rest)
neg_out = (w_start * w_rest).sum(dim=-1).view(-1)
# compute loss
pos_loss = -torch.log(torch.sigmoid(pos_out) + e).mean()
neg_loss = -torch.log(1 - torch.sigmoid(neg_out) + e).mean()
return pos_loss + neg_loss
def loader(self, batch_size):
"""
Parameters
----------
batch_size: int
batch size
Returns
-------
DataLoader
Node2vec training data loader
"""
return DataLoader(torch.arange(self.N), batch_size=batch_size, shuffle=True, collate_fn=self.sample)
@torch.no_grad()
def evaluate(self, x_train, y_train, x_val, y_val):
"""
Evaluate the quality of embedding vector via a downstream classification task with logistic regression.
"""
x_train = self.forward(x_train)
x_val = self.forward(x_val)
x_train, y_train = x_train.cpu().numpy(), y_train.cpu().numpy()
x_val, y_val = x_val.cpu().numpy(), y_val.cpu().numpy()
lr = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=150).fit(x_train, y_train)
return lr.score(x_val, y_val)
class Node2vecModel(object):
"""
Wrapper of the ``Node2Vec`` class with a ``train`` method.
Attributes
----------
g: DGLGraph
The graph.
embedding_dim: int
Dimension of node embedding.
walk_length: int
Length of each trace.
p: float
Likelihood of immediately revisiting a node in the walk.
q: float
Control parameter to interpolate between breadth-first strategy and depth-first strategy.
num_walks: int
Number of random walks for each node. Default: 10.
window_size: int
Maximum distance between the center node and predicted node. Default: 5.
num_negatives: int
The number of negative samples for each positive sample. Default: 5.
use_sparse: bool
If set to True, uses PyTorch's sparse embedding and optimizer. Default: ``True``.
weight_name : str, optional
The name of the edge feature tensor on the graph storing the (unnormalized)
probabilities associated with each edge for choosing the next node.
The feature tensor must be non-negative and the sum of the probabilities
must be positive for the outbound edges of all nodes (although they don't have
to sum up to one). The result will be undefined otherwise.
If omitted, DGL assumes that the neighbors are picked uniformly. Default: ``None``.
eval_set: list of tuples (Tensor, Tensor)
[(nodes_train,y_train),(nodes_val,y_val)]
If omitted, model will not be evaluated. Default: ``None``.
eval_steps: int
Interval steps of evaluation.
if set <= 0, model will not be evaluated. Default: ``None``.
device: str
device, default 'cpu'.
"""
def __init__(self, g, embedding_dim, walk_length, p=1.0, q=1.0, num_walks=1, window_size=5,
num_negatives=5, use_sparse=True, weight_name=None, eval_set=None, eval_steps=-1, device='cpu'):
self.model = Node2vec(g, embedding_dim, walk_length, p, q, num_walks,
window_size, num_negatives, use_sparse, weight_name)
self.g = g
self.use_sparse = use_sparse
self.eval_steps = eval_steps
self.eval_set = eval_set
if device == 'cpu':
self.device = device
else:
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
def _train_step(self, model, loader, optimizer, device):
model.train()
total_loss = 0
for pos_traces, neg_traces in loader:
pos_traces, neg_traces = pos_traces.to(device), neg_traces.to(device)
optimizer.zero_grad()
loss = model.loss(pos_traces, neg_traces)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(loader)
@torch.no_grad()
def _evaluate_step(self):
nodes_train, y_train = self.eval_set[0]
nodes_val, y_val = self.eval_set[1]
acc = self.model.evaluate(nodes_train, y_train, nodes_val, y_val)
return acc
def train(self, epochs, batch_size, learning_rate=0.01):
"""
Parameters
----------
epochs: int
num of train epoch
batch_size: int
batch size
learning_rate: float
learning rate. Default 0.01.
"""
self.model = self.model.to(self.device)
loader = self.model.loader(batch_size)
if self.use_sparse:
optimizer = torch.optim.SparseAdam(list(self.model.parameters()), lr=learning_rate)
else:
optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
for i in range(epochs):
loss = self._train_step(self.model, loader, optimizer, self.device)
if self.eval_steps > 0:
if epochs % self.eval_steps == 0:
acc = self._evaluate_step()
print("Epoch: {}, Train Loss: {:.4f}, Val Acc: {:.4f}".format(i, loss, acc))
def embedding(self, nodes=None):
"""
Returns the embeddings of the input nodes
Parameters
----------
nodes: Tensor, optional
Input nodes, if set `None`, will return all the node embedding.
Returns
-------
Tensor
Node embedding.
"""
return self.model(nodes)
import argparse
from dgl.data import CitationGraphDataset
from ogb.nodeproppred import *
from ogb.linkproppred import *
def load_graph(name):
cite_graphs = ['cora', 'citeseer', 'pubmed']
if name in cite_graphs:
dataset = CitationGraphDataset(name)
graph = dataset[0]
nodes = graph.nodes()
y = graph.ndata['label']
train_mask = graph.ndata['train_mask']
val_mask = graph.ndata['test_mask']
nodes_train, y_train = nodes[train_mask], y[train_mask]
nodes_val, y_val = nodes[val_mask], y[val_mask]
eval_set = [(nodes_train, y_train), (nodes_val, y_val)]
elif name.startswith('ogbn'):
dataset = DglNodePropPredDataset(name)
graph, y = dataset[0]
split_nodes = dataset.get_idx_split()
nodes = graph.nodes()
train_idx = split_nodes['train']
val_idx = split_nodes['valid']
nodes_train, y_train = nodes[train_idx], y[train_idx]
nodes_val, y_val = nodes[val_idx], y[val_idx]
eval_set = [(nodes_train, y_train), (nodes_val, y_val)]
else:
raise ValueError("Dataset name error!")
return graph, eval_set
def parse_arguments():
"""
Parse arguments
"""
parser = argparse.ArgumentParser(description='Node2vec')
parser.add_argument('--dataset', type=str, default='cora')
# 'train' for training node2vec model, 'time' for testing speed of random walk
parser.add_argument('--task', type=str, default='train')
parser.add_argument('--runs', type=int, default=10)
parser.add_argument('--device', type=str, default='cpu')
parser.add_argument('--embedding_dim', type=int, default=128)
parser.add_argument('--walk_length', type=int, default=50)
parser.add_argument('--p', type=float, default=0.25)
parser.add_argument('--q', type=float, default=4.0)
parser.add_argument('--num_walks', type=int, default=10)
parser.add_argument('--epochs', type=int, default=100)
parser.add_argument('--batch_size', type=int, default=128)
args = parser.parse_args()
return args
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <dgl/array.h> #include <dgl/array.h>
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <tuple>
namespace dgl { namespace dgl {
...@@ -26,9 +27,11 @@ namespace sampling { ...@@ -26,9 +27,11 @@ namespace sampling {
* \return A pair of * \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* 2. One 1D array of shape (len(metapath) + 1) with node type IDs. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/ */
std::pair<IdArray, TypeArray> RandomWalk( std::tuple<IdArray, IdArray, TypeArray> RandomWalk(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -46,9 +49,11 @@ std::pair<IdArray, TypeArray> RandomWalk( ...@@ -46,9 +49,11 @@ std::pair<IdArray, TypeArray> RandomWalk(
* \return A pair of * \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* 2. One 1D array of shape (len(metapath) + 1) with node type IDs. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/ */
std::pair<IdArray, TypeArray> RandomWalkWithRestart( std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -69,9 +74,11 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart( ...@@ -69,9 +74,11 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart(
* \return A pair of * \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* 2. One 1D array of shape (len(metapath) + 1) with node type IDs. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/ */
std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart( std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithStepwiseRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
......
...@@ -8,3 +8,4 @@ gives a holistic explanation on how different components work together. ...@@ -8,3 +8,4 @@ gives a holistic explanation on how different components work together.
from .randomwalks import * from .randomwalks import *
from .pinsage import * from .pinsage import *
from .neighbor import * from .neighbor import *
from .node2vec_randomwalk import *
"""Node2vec random walk"""
from .._ffi.function import _init_api
from .. import backend as F
from .. import ndarray as nd
from .. import utils
# pylint: disable=invalid-name
__all__ = ['node2vec_random_walk']
def node2vec_random_walk(g, nodes, p, q, walk_length, prob=None, return_eids=False):
"""
Generate random walk traces from an array of starting nodes based on the node2vec model.
Paper: `node2vec: Scalable Feature Learning for Networks
<https://arxiv.org/abs/1607.00653>`__.
The returned traces all have length ``walk_length + 1``, where the first node
is the starting node itself.
Note that if a random walk stops in advance, DGL pads the trace with -1 to have the same
length.
Parameters
----------
g : DGLGraph
The graph. Must be on CPU.
Note that node2vec only support homogeneous graph.
nodes : Tensor
Node ID tensor from which the random walk traces starts.
The tensor must be on CPU, and must have the same dtype as the ID type
of the graph.
p: float
Likelihood of immediately revisiting a node in the walk.
q: float
Control parameter to interpolate between breadth-first strategy and depth-first strategy.
walk_length: int
Length of random walks.
prob : str, optional
The name of the edge feature tensor on the graph storing the (unnormalized)
probabilities associated with each edge for choosing the next node.
The feature tensor must be non-negative and the sum of the probabilities
must be positive for the outbound edges of all nodes (although they don't have
to sum up to one). The result will be undefined otherwise.
If omitted, DGL assumes that the neighbors are picked uniformly.
return_eids : bool, optional
If True, additionally return the edge IDs traversed.
Default: False.
Returns
-------
traces : Tensor
A 2-dimensional node ID tensor with shape ``(num_seeds, walk_length + 1)``.
eids : Tensor, optional
A 2-dimensional edge ID tensor with shape ``(num_seeds, length)``.
Only returned if :attr:`return_eids` is True.
Examples
--------
>>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]))
>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4)
tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 3],
[2, 0, 1, 3, 0],
[0, 1, 2, 0, 1]])
>>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4, return_eids=True)
(tensor([[0, 1, 3, 0, 1],
[1, 2, 0, 1, 2],
[2, 0, 1, 2, 0],
[0, 1, 2, 0, 1]]),
tensor([[0, 2, 4, 0],
[1, 3, 0, 1],
[3, 0, 1, 3],
[0, 1, 3, 0]]))
"""
assert g.device == F.cpu(), "Graph must be on CPU."
gidx = g._graph
nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, 'nodes'))
if prob is None:
prob_nd = nd.array([], ctx=nodes.ctx)
else:
prob_nd = F.to_dgl_nd(g.edata[prob])
traces, eids = _CAPI_DGLSamplingNode2vec(gidx, nodes, p, q, walk_length, prob_nd)
traces = F.from_dgl_nd(traces)
eids = F.from_dgl_nd(eids)
return (traces, eids) if return_eids else traces
_init_api('dgl.sampling.randomwalks', __name__)
...@@ -11,7 +11,8 @@ __all__ = [ ...@@ -11,7 +11,8 @@ __all__ = [
'random_walk', 'random_walk',
'pack_traces'] 'pack_traces']
def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob=None): def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob=None,
return_eids=False):
"""Generate random walk traces from an array of starting nodes based on the given metapath. """Generate random walk traces from an array of starting nodes based on the given metapath.
For a single starting node, ``num_traces`` traces would be generated. A trace would For a single starting node, ``num_traces`` traces would be generated. A trace would
...@@ -62,12 +63,20 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob ...@@ -62,12 +63,20 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
If a tensor is given, :attr:`restart_prob` should have the same length as If a tensor is given, :attr:`restart_prob` should have the same length as
:attr:`metapath` or :attr:`length`. :attr:`metapath` or :attr:`length`.
return_eids : bool, optional
If True, additionally return the edge IDs traversed.
Default: False.
Returns Returns
------- -------
traces : Tensor traces : Tensor
A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or
``(num_seeds, length + 1)`` if :attr:`metapath` is None. ``(num_seeds, length + 1)`` if :attr:`metapath` is None.
eids : Tensor, optional
A 2-dimensional edge ID tensor with shape ``(num_seeds, len(metapath))`` or
``(num_seeds, length)`` if :attr:`metapath` is None. Only returned if
:attr:`return_eids` is True.
types : Tensor types : Tensor
A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or
``(length + 1)``. ``(length + 1)``.
...@@ -90,6 +99,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob ...@@ -90,6 +99,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
[2, 0, 1, 3, 0], [2, 0, 1, 3, 0],
[0, 1, 2, 0, 1]]), tensor([0, 0, 0, 0, 0])) [0, 1, 2, 0, 1]]), tensor([0, 0, 0, 0, 0]))
Or returning edge IDs:
>>> dgl.sampling.random_walk(g1, [0, 1, 2, 0], length=4, return_eids=True)
(tensor([[0, 1, 2, 0, 1],
[1, 3, 0, 1, 2],
[2, 0, 1, 3, 0],
[0, 1, 3, 0, 1]]),
tensor([[0, 1, 3, 0],
[2, 4, 0, 1],
[3, 0, 2, 4],
[0, 2, 4, 0]]),
tensor([0, 0, 0, 0, 0]))
The first tensor indicates the random walk path for each seed node. The first tensor indicates the random walk path for each seed node.
The j-th element in the second tensor indicates the node type ID of the j-th node The j-th element in the second tensor indicates the node type ID of the j-th node
in every path. In this case, it is returning all 0. in every path. In this case, it is returning all 0.
...@@ -170,18 +192,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob ...@@ -170,18 +192,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
# Actual random walk # Actual random walk
if restart_prob is None: if restart_prob is None:
traces, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd) traces, eids, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd)
elif F.is_tensor(restart_prob): elif F.is_tensor(restart_prob):
restart_prob = F.to_dgl_nd(restart_prob) restart_prob = F.to_dgl_nd(restart_prob)
traces, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart( traces, eids, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart(
gidx, nodes, metapath, p_nd, restart_prob) gidx, nodes, metapath, p_nd, restart_prob)
else: else:
traces, types = _CAPI_DGLSamplingRandomWalkWithRestart( traces, eids, types = _CAPI_DGLSamplingRandomWalkWithRestart(
gidx, nodes, metapath, p_nd, restart_prob) gidx, nodes, metapath, p_nd, restart_prob)
traces = F.from_dgl_nd(traces) traces = F.from_dgl_nd(traces)
types = F.from_dgl_nd(types) types = F.from_dgl_nd(types)
return traces, types eids = F.from_dgl_nd(eids)
return (traces, eids, types) if return_eids else (traces, types)
def pack_traces(traces, types): def pack_traces(traces, types):
"""Pack the padded traces returned by ``random_walk()`` into a concatenated array. """Pack the padded traces returned by ``random_walk()`` into a concatenated array.
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <dgl/base_heterograph.h> #include <dgl/base_heterograph.h>
#include <dgl/random.h> #include <dgl/random.h>
#include <utility> #include <utility>
#include <tuple>
#include <vector> #include <vector>
#include "randomwalks_impl.h" #include "randomwalks_impl.h"
#include "randomwalks_cpu.h" #include "randomwalks_cpu.h"
...@@ -47,14 +48,15 @@ using TerminatePredicate = std::function<bool(IdxType *, dgl_id_t, int64_t)>; ...@@ -47,14 +48,15 @@ using TerminatePredicate = std::function<bool(IdxType *, dgl_id_t, int64_t)>;
* \param prob Transition probability per edge type. * \param prob Transition probability per edge type.
* \param terminate Predicate for terminating the current random walk path. * \param terminate Predicate for terminating the current random walk path.
* *
* \return A pair of ID of next successor (-1 if not exist), as well as whether to terminate. * \return A tuple of ID of next successor (-1 if not exist), the last traversed edge
* ID, as well as whether to terminate.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
std::pair<dgl_id_t, bool> MetapathRandomWalkStep( std::tuple<dgl_id_t, dgl_id_t, bool> MetapathRandomWalkStep(
IdxType *data, IdxType *data,
dgl_id_t curr, dgl_id_t curr,
int64_t len, int64_t len,
const std::vector<std::vector<IdArray> > &edges_by_type, const std::vector<CSRMatrix> &edges_by_type,
const IdxType *metapath_data, const IdxType *metapath_data,
const std::vector<FloatArray> &prob, const std::vector<FloatArray> &prob,
TerminatePredicate<IdxType> terminate) { TerminatePredicate<IdxType> terminate) {
...@@ -65,14 +67,16 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep( ...@@ -65,14 +67,16 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
// construction) as much as possible. // construction) as much as possible.
// Using Successors() slows down by 2x. // Using Successors() slows down by 2x.
// Using OutEdges() slows down by 10x. // Using OutEdges() slows down by 10x.
const std::vector<NDArray> &csr_arrays = edges_by_type[etype]; const CSRMatrix &csr = edges_by_type[etype];
const IdxType *offsets = static_cast<IdxType *>(csr_arrays[0]->data); const IdxType *offsets = csr.indptr.Ptr<IdxType>();
const IdxType *all_succ = static_cast<IdxType *>(csr_arrays[1]->data); const IdxType *all_succ = csr.indices.Ptr<IdxType>();
const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
const IdxType *succ = all_succ + offsets[curr]; const IdxType *succ = all_succ + offsets[curr];
const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
const int64_t size = offsets[curr + 1] - offsets[curr]; const int64_t size = offsets[curr + 1] - offsets[curr];
if (size == 0) if (size == 0)
return std::make_pair(-1, true); return std::make_tuple(-1, -1, true);
// Use a reference to the original array instead of copying // Use a reference to the original array instead of copying
// This avoids updating the ref counts atomically from different threads // This avoids updating the ref counts atomically from different threads
...@@ -83,22 +87,18 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep( ...@@ -83,22 +87,18 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
// empty probability array; assume uniform // empty probability array; assume uniform
idx = RandomEngine::ThreadLocal()->RandInt(size); idx = RandomEngine::ThreadLocal()->RandInt(size);
} else { } else {
// non-uniform random walk
const IdxType *all_eids = static_cast<IdxType *>(csr_arrays[2]->data);
const IdxType *eids = all_eids + offsets[curr];
ATEN_FLOAT_TYPE_SWITCH(prob_etype->dtype, DType, "probability", { ATEN_FLOAT_TYPE_SWITCH(prob_etype->dtype, DType, "probability", {
FloatArray prob_selected = FloatArray::Empty({size}, prob_etype->dtype, prob_etype->ctx); FloatArray prob_selected = FloatArray::Empty({size}, prob_etype->dtype, prob_etype->ctx);
DType *prob_selected_data = static_cast<DType *>(prob_selected->data); DType *prob_selected_data = prob_selected.Ptr<DType>();
const DType *prob_etype_data = static_cast<DType *>(prob_etype->data); const DType *prob_etype_data = prob_etype.Ptr<DType>();
for (int64_t j = 0; j < size; ++j) for (int64_t j = 0; j < size; ++j)
prob_selected_data[j] = prob_etype_data[eids[j]]; prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]];
idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected); idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
}); });
} }
curr = succ[idx]; dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
return std::make_pair(curr, terminate(data, curr, len)); return std::make_tuple(succ[idx], eid, terminate(data, curr, len));
} }
/*! /*!
...@@ -119,11 +119,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep( ...@@ -119,11 +119,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
* \note This function is called only if all the probability arrays are null. * \note This function is called only if all the probability arrays are null.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform( std::tuple<dgl_id_t, dgl_id_t, bool> MetapathRandomWalkStepUniform(
IdxType *data, IdxType *data,
dgl_id_t curr, dgl_id_t curr,
int64_t len, int64_t len,
const std::vector<std::vector<IdArray> > &edges_by_type, const std::vector<CSRMatrix> &edges_by_type,
const IdxType *metapath_data, const IdxType *metapath_data,
const std::vector<FloatArray> &prob, const std::vector<FloatArray> &prob,
TerminatePredicate<IdxType> terminate) { TerminatePredicate<IdxType> terminate) {
...@@ -134,21 +134,23 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform( ...@@ -134,21 +134,23 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform(
// construction) as much as possible. // construction) as much as possible.
// Using Successors() slows down by 2x. // Using Successors() slows down by 2x.
// Using OutEdges() slows down by 10x. // Using OutEdges() slows down by 10x.
const std::vector<NDArray> &csr_arrays = edges_by_type[etype]; const CSRMatrix &csr = edges_by_type[etype];
const IdxType *offsets = static_cast<IdxType *>(csr_arrays[0]->data); const IdxType *offsets = csr.indptr.Ptr<IdxType>();
const IdxType *all_succ = static_cast<IdxType *>(csr_arrays[1]->data); const IdxType *all_succ = csr.indices.Ptr<IdxType>();
const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
const IdxType *succ = all_succ + offsets[curr]; const IdxType *succ = all_succ + offsets[curr];
const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
const int64_t size = offsets[curr + 1] - offsets[curr]; const int64_t size = offsets[curr + 1] - offsets[curr];
if (size == 0) if (size == 0)
return std::make_pair(-1, true); return std::make_tuple(-1, -1, true);
IdxType idx = 0; IdxType idx = 0;
// Guaranteed uniform distribution // Guaranteed uniform distribution
idx = RandomEngine::ThreadLocal()->RandInt(size); idx = RandomEngine::ThreadLocal()->RandInt(size);
curr = succ[idx]; dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
return std::make_pair(curr, terminate(data, curr, len)); return std::make_tuple(succ[idx], eid, terminate(data, curr, len));
} }
/*! /*!
...@@ -160,10 +162,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform( ...@@ -160,10 +162,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform(
* \param prob A vector of 1D float arrays, indicating the transition probability of * \param prob A vector of 1D float arrays, indicating the transition probability of
* each edge by edge type. An empty float array assumes uniform transition. * each edge by edge type. An empty float array assumes uniform transition.
* \param terminate Predicate for terminating a random walk path. * \param terminate Predicate for terminating a random walk path.
* \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs, and
* A 2D array of shape (len(seeds), len(metapath)) with edge IDs.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray MetapathBasedRandomWalk( std::pair<IdArray, IdArray> MetapathBasedRandomWalk(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -176,13 +179,12 @@ IdArray MetapathBasedRandomWalk( ...@@ -176,13 +179,12 @@ IdArray MetapathBasedRandomWalk(
// This forces the heterograph to materialize all OutCSR's before the OpenMP loop; // This forces the heterograph to materialize all OutCSR's before the OpenMP loop;
// otherwise data races will happen. // otherwise data races will happen.
// TODO(BarclayII): should we later on materialize COO/CSR/CSC anyway unless told otherwise? // TODO(BarclayII): should we later on materialize COO/CSR/CSC anyway unless told otherwise?
std::vector<std::vector<IdArray> > edges_by_type; std::vector<CSRMatrix> edges_by_type;
for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype)
edges_by_type.push_back(hg->GetAdj(etype, true, "csr")); edges_by_type.push_back(hg->GetCSRMatrix(etype));
// Hoist the check for Uniform vs Non uniform edge distribution // Hoist the check for Uniform vs Non uniform edge distribution
// to avoid putting it on the hot path // to avoid putting it on the hot path
StepFunc<IdxType> step;
bool isUniform = true; bool isUniform = true;
for (const auto &etype_prob : prob) { for (const auto &etype_prob : prob) {
if (!IsNullArray(etype_prob)) { if (!IsNullArray(etype_prob)) {
...@@ -191,22 +193,22 @@ IdArray MetapathBasedRandomWalk( ...@@ -191,22 +193,22 @@ IdArray MetapathBasedRandomWalk(
} }
} }
if (!isUniform) { if (!isUniform) {
step = StepFunc<IdxType> step =
[&edges_by_type, metapath_data, &prob, terminate] [&edges_by_type, metapath_data, &prob, terminate]
(IdxType *data, dgl_id_t curr, int64_t len) { (IdxType *data, dgl_id_t curr, int64_t len) {
return MetapathRandomWalkStep<XPU, IdxType>( return MetapathRandomWalkStep<XPU, IdxType>(
data, curr, len, edges_by_type, metapath_data, prob, terminate); data, curr, len, edges_by_type, metapath_data, prob, terminate);
}; };
return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
} else { } else {
step = StepFunc<IdxType> step =
[&edges_by_type, metapath_data, &prob, terminate] [&edges_by_type, metapath_data, &prob, terminate]
(IdxType *data, dgl_id_t curr, int64_t len) { (IdxType *data, dgl_id_t curr, int64_t len) {
return MetapathRandomWalkStepUniform<XPU, IdxType>( return MetapathRandomWalkStepUniform<XPU, IdxType>(
data, curr, len, edges_by_type, metapath_data, prob, terminate); data, curr, len, edges_by_type, metapath_data, prob, terminate);
}; };
}
return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step); return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
}
} }
}; // namespace }; // namespace
......
/*!
* Copyright (c) 2021 by Contributors
* \file graph/sampling/node2vec.cc
* \brief Dispatcher of DGL node2vec random walks
*/
#include <dgl/array.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/container.h>
#include "../../../c_api_common.h"
#include "node2vec_impl.h"
using namespace dgl::runtime;
using namespace dgl::aten;
namespace dgl {
namespace sampling {
namespace {
void CheckNode2vecInputs(const HeteroGraphPtr hg, const IdArray seeds,
const double p, const double q,
const int64_t walk_length, const FloatArray &prob) {
CHECK_INT(seeds, "seeds");
CHECK_NDIM(seeds, 1, "seeds");
CHECK_FLOAT(prob, "probability");
CHECK_NDIM(prob, 1, "probability");
}
std::pair<IdArray, IdArray> Node2vec(
const HeteroGraphPtr hg, const IdArray seeds, const double p,
const double q, const int64_t walk_length,
const FloatArray &prob) {
CheckNode2vecInputs(hg, seeds, p, q, walk_length, prob);
std::pair<IdArray, IdArray> result;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "Node2vec", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
result = impl::Node2vec<XPU, IdxType>(hg, seeds, p, q, walk_length, prob);
});
});
return result;
}
DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingNode2vec")
.set_body([](DGLArgs args, DGLRetValue *rv) {
HeteroGraphRef hg = args[0];
IdArray seeds = args[1];
double p = args[2];
double q = args[3];
int64_t walk_length = args[4];
FloatArray prob = args[5];
auto result =
sampling::Node2vec(hg.sptr(), seeds, p, q, walk_length, prob);
List<Value> ret;
ret.push_back(Value(MakeValue(result.first)));
ret.push_back(Value(MakeValue(result.second)));
*rv = ret;
});
} // namespace
} // namespace sampling
} // namespace dgl
/*!
* Copyright (c) 2021 by Contributors
* \file graph/sampling/node2vec_cpu.cc
* \brief DGL sampler - CPU implementation of node2vec random walk with OpenMP
*/
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <utility>
#include "node2vec_randomwalk.h"
namespace dgl {
using namespace dgl::runtime;
using namespace dgl::aten;
namespace sampling {
namespace impl {
template <DLDeviceType XPU, typename IdxType>
std::pair<IdArray, IdArray> Node2vec(
const HeteroGraphPtr hg, const IdArray seeds, const double p,
const double q, const int64_t walk_length,
const FloatArray &prob) {
TerminatePredicate<IdxType> terminate = [](IdxType *data, dgl_id_t curr,
int64_t len) { return false; };
return Node2vecRandomWalk<XPU, IdxType>(hg, seeds, p, q, walk_length, prob,
terminate);
}
template std::pair<IdArray, IdArray> Node2vec<kDLCPU, int32_t>(
const HeteroGraphPtr hg,
const IdArray seeds, const double p,
const double q,
const int64_t walk_length,
const FloatArray &prob);
template std::pair<IdArray, IdArray> Node2vec<kDLCPU, int64_t>(
const HeteroGraphPtr hg,
const IdArray seeds, const double p,
const double q,
const int64_t walk_length,
const FloatArray &prob);
}; // namespace impl
}; // namespace sampling
}; // namespace dgl
/*!
* Copyright (c) 2021 by Contributors
* \file graph/sampling/node2vec_impl.h
* \brief DGL sampler - templated implementation definition of node2vec random
* walks
*/
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <functional>
#include <utility>
#include <vector>
#include <tuple>
namespace dgl {
using namespace dgl::runtime;
using namespace dgl::aten;
namespace sampling {
namespace impl {
/*!
* \brief Node2vec random walk.
* \param hg The heterograph.
* \param seeds A 1D array of seed nodes, with the type the source type of the
* first edge type in the metapath.
* \param p Float, indicating likelihood of immediately revisiting a node in the walk.
* \param q Float, control parameter to interpolate between breadth-first strategy and
* depth-first strategy.
* \param walk_length Int, length of walk.
* \param prob A vector of 1D float arrays, indicating the transition
* probability of each edge by edge type. An empty float array assumes uniform
* transition.
* \return A 2D array of shape (len(seeds), len(walk_length) + 1)
* with node IDs. The paths that terminated early are padded with -1.
*/
template <DLDeviceType XPU, typename IdxType>
std::pair<IdArray, IdArray> Node2vec(
const HeteroGraphPtr hg, const IdArray seeds, const double p,
const double q, const int64_t walk_length,
const FloatArray &prob);
}; // namespace impl
}; // namespace sampling
}; // namespace dgl
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
/*!
* Copyright (c) 2021 by Contributors
* \file graph/sampling/node2vec_randomwalk.cc
* \brief DGL sampler - CPU implementation of node2vec random walk.
*/
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <dgl/random.h>
#include <algorithm>
#include <cmath>
#include <functional>
#include <utility>
#include <vector>
#include <tuple>
#include "node2vec_impl.h"
#include "randomwalks_cpu.h"
#include "metapath_randomwalk.h" // for TerminatePredicate
namespace dgl {
using namespace dgl::runtime;
using namespace dgl::aten;
namespace sampling {
namespace impl {
namespace {
template <typename IdxType>
bool has_edge_between(const CSRMatrix &csr, dgl_id_t u,
dgl_id_t v) {
const IdxType *offsets = csr.indptr.Ptr<IdxType>();
const IdxType *all_succ = csr.indices.Ptr<IdxType>();
const IdxType *u_succ = all_succ + offsets[u];
const int64_t size = offsets[u + 1] - offsets[u];
if (csr.sorted)
return std::binary_search(u_succ, u_succ + size, v);
else
return std::find(u_succ, u_succ + size, v) != u_succ + size;
}
/*!
* \brief Node2vec random walk step function
* \param data The path generated so far, of type \c IdxType.
* \param curr The last node ID generated.
* \param pre The last last node ID generated
* \param p Float, indicating likelihood of immediately revisiting a node in the
* walk.
* \param q Float, control parameter to interpolate between breadth-first
* strategy and depth-first strategy.
* \param len The number of nodes generated so far. Note that the seed node is
* always included as \c data[0], and the successors start from \c data[1].
* \param csr The CSR matrix
* \param prob Transition probability
* \param terminate Predicate for terminating the current random walk path.
* \return A tuple of ID of next successor (-1 if not exist), the edge ID traversed,
* as well as whether to terminate.
*/
template <DLDeviceType XPU, typename IdxType>
std::tuple<dgl_id_t, dgl_id_t, bool> Node2vecRandomWalkStep(
IdxType *data, dgl_id_t curr, dgl_id_t pre, const double p, const double q,
int64_t len, const CSRMatrix &csr, const FloatArray &probs,
TerminatePredicate<IdxType> terminate) {
const IdxType *offsets = csr.indptr.Ptr<IdxType>();
const IdxType *all_succ = csr.indices.Ptr<IdxType>();
const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
const IdxType *succ = all_succ + offsets[curr];
const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
const int64_t size = offsets[curr + 1] - offsets[curr];
// Isolated node
if (size == 0) return std::make_tuple(-1, -1, true);
IdxType idx = 0;
// Normalize the weights to compute rejection probabilities
double max_prob = std::max({1 / p, 1.0, 1 / q});
// rejection prob for back to the previous node
double prob0 = 1 / p / max_prob;
// rejection prob for visiting the node with the distance of 1 between the
// previous node
double prob1 = 1 / max_prob;
// rejection prob for visiting the node with the distance of 2 between the
// previous node
double prob2 = 1 / q / max_prob;
dgl_id_t next_node;
double r; // rejection probability.
if (IsNullArray(probs)) {
if (len == 0) {
idx = RandomEngine::ThreadLocal()->RandInt(size);
next_node = succ[idx];
} else {
while (true) {
idx = RandomEngine::ThreadLocal()->RandInt(size);
r = RandomEngine::ThreadLocal()->Uniform(0., 1.);
next_node = succ[idx];
if (next_node == pre) {
if (r < prob0) break;
} else if (has_edge_between<IdxType>(csr, next_node, pre)) {
if (r < prob1) break;
} else if (r < prob2) {
break;
}
}
}
} else {
FloatArray prob_selected;
ATEN_FLOAT_TYPE_SWITCH(probs->dtype, DType, "probability", {
prob_selected = FloatArray::Empty({size}, probs->dtype, probs->ctx);
DType *prob_selected_data = prob_selected.Ptr<DType>();
const DType *prob_etype_data = probs.Ptr<DType>();
for (int64_t j = 0; j < size; ++j)
prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]];
});
if (len == 0) {
idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
next_node = succ[idx];
} else {
while (true) {
idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
r = RandomEngine::ThreadLocal()->Uniform(0., 1.);
next_node = succ[idx];
if (next_node == pre) {
if (r < prob0) break;
} else if (has_edge_between<IdxType>(csr, next_node, pre)) {
if (r < prob1) break;
} else if (r < prob2) {
break;
}
}
}
}
dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
return std::make_tuple(next_node, eid, terminate(data, next_node, len));
}
template <DLDeviceType XPU, typename IdxType>
std::pair<IdArray, IdArray> Node2vecRandomWalk(
const HeteroGraphPtr g, const IdArray seeds,
const double p, const double q,
const int64_t max_num_steps, const FloatArray &prob,
TerminatePredicate<IdxType> terminate) {
const CSRMatrix &edges = g->GetCSRMatrix(0); // homogeneous graph.
StepFunc<IdxType> step =
[&edges, &prob, p, q, terminate]
(IdxType *data, dgl_id_t curr, int64_t len) {
dgl_id_t pre = (len != 0) ? data[len - 1] : curr;
return Node2vecRandomWalkStep<XPU, IdxType>(data, curr, pre, p, q, len,
edges, prob, terminate);
};
return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
}
}; // namespace
}; // namespace impl
}; // namespace sampling
}; // namespace dgl
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <dgl/array.h> #include <dgl/array.h>
#include <dgl/base_heterograph.h> #include <dgl/base_heterograph.h>
#include <vector> #include <vector>
#include <utility>
#include "randomwalks_impl.h" #include "randomwalks_impl.h"
#include "randomwalks_cpu.h" #include "randomwalks_cpu.h"
#include "metapath_randomwalk.h" #include "metapath_randomwalk.h"
...@@ -21,7 +22,7 @@ namespace sampling { ...@@ -21,7 +22,7 @@ namespace sampling {
namespace impl { namespace impl {
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalk( std::pair<IdArray, IdArray> RandomWalk(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -35,13 +36,13 @@ IdArray RandomWalk( ...@@ -35,13 +36,13 @@ IdArray RandomWalk(
} }
template template
IdArray RandomWalk<kDLCPU, int32_t>( std::pair<IdArray, IdArray> RandomWalk<kDLCPU, int32_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
const std::vector<FloatArray> &prob); const std::vector<FloatArray> &prob);
template template
IdArray RandomWalk<kDLCPU, int64_t>( std::pair<IdArray, IdArray> RandomWalk<kDLCPU, int64_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
......
...@@ -23,7 +23,7 @@ namespace sampling { ...@@ -23,7 +23,7 @@ namespace sampling {
namespace impl { namespace impl {
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalkWithRestart( std::pair<IdArray, IdArray> RandomWalkWithRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -37,14 +37,14 @@ IdArray RandomWalkWithRestart( ...@@ -37,14 +37,14 @@ IdArray RandomWalkWithRestart(
} }
template template
IdArray RandomWalkWithRestart<kDLCPU, int32_t>( std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLCPU, int32_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
const std::vector<FloatArray> &prob, const std::vector<FloatArray> &prob,
double restart_prob); double restart_prob);
template template
IdArray RandomWalkWithRestart<kDLCPU, int64_t>( std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLCPU, int64_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -52,13 +52,13 @@ IdArray RandomWalkWithRestart<kDLCPU, int64_t>( ...@@ -52,13 +52,13 @@ IdArray RandomWalkWithRestart<kDLCPU, int64_t>(
double restart_prob); double restart_prob);
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalkWithStepwiseRestart( std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
const std::vector<FloatArray> &prob, const std::vector<FloatArray> &prob,
FloatArray restart_prob) { FloatArray restart_prob) {
IdArray result; std::pair<IdArray, IdArray> result;
ATEN_FLOAT_TYPE_SWITCH(restart_prob->dtype, DType, "restart probability", { ATEN_FLOAT_TYPE_SWITCH(restart_prob->dtype, DType, "restart probability", {
DType *restart_prob_data = static_cast<DType *>(restart_prob->data); DType *restart_prob_data = static_cast<DType *>(restart_prob->data);
...@@ -73,14 +73,14 @@ IdArray RandomWalkWithStepwiseRestart( ...@@ -73,14 +73,14 @@ IdArray RandomWalkWithStepwiseRestart(
} }
template template
IdArray RandomWalkWithStepwiseRestart<kDLCPU, int32_t>( std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLCPU, int32_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
const std::vector<FloatArray> &prob, const std::vector<FloatArray> &prob,
FloatArray restart_prob); FloatArray restart_prob);
template template
IdArray RandomWalkWithStepwiseRestart<kDLCPU, int64_t>( std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLCPU, int64_t>(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
......
...@@ -42,7 +42,7 @@ void CheckRandomWalkInputs( ...@@ -42,7 +42,7 @@ void CheckRandomWalkInputs(
}; // namespace }; // namespace
std::pair<IdArray, TypeArray> RandomWalk( std::tuple<IdArray, IdArray, TypeArray> RandomWalk(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -50,18 +50,18 @@ std::pair<IdArray, TypeArray> RandomWalk( ...@@ -50,18 +50,18 @@ std::pair<IdArray, TypeArray> RandomWalk(
CheckRandomWalkInputs(hg, seeds, metapath, prob); CheckRandomWalkInputs(hg, seeds, metapath, prob);
TypeArray vtypes; TypeArray vtypes;
IdArray vids; std::pair<IdArray, IdArray> result;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalk", { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalk", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob); result = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob);
}); });
}); });
return std::make_pair(vids, vtypes); return std::make_tuple(result.first, result.second, vtypes);
} }
std::pair<IdArray, TypeArray> RandomWalkWithRestart( std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -71,18 +71,18 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart( ...@@ -71,18 +71,18 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart(
CHECK(restart_prob >= 0 && restart_prob < 1) << "restart probability must belong to [0, 1)"; CHECK(restart_prob >= 0 && restart_prob < 1) << "restart probability must belong to [0, 1)";
TypeArray vtypes; TypeArray vtypes;
IdArray vids; std::pair<IdArray, IdArray> result;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithRestart", { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithRestart", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob); result = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob);
}); });
}); });
return std::make_pair(vids, vtypes); return std::make_tuple(result.first, result.second, vtypes);
} }
std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart( std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithStepwiseRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -92,16 +92,16 @@ std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart( ...@@ -92,16 +92,16 @@ std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart(
// TODO(BarclayII): check the elements of restart probability // TODO(BarclayII): check the elements of restart probability
TypeArray vtypes; TypeArray vtypes;
IdArray vids; std::pair<IdArray, IdArray> result;
ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithStepwiseRestart", { ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithStepwiseRestart", {
ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath); vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
vids = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>( result = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>(
hg, seeds, metapath, prob, restart_prob); hg, seeds, metapath, prob, restart_prob);
}); });
}); });
return std::make_pair(vids, vtypes); return std::make_tuple(result.first, result.second, vtypes);
} }
}; // namespace sampling }; // namespace sampling
...@@ -117,8 +117,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalk") ...@@ -117,8 +117,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalk")
auto result = sampling::RandomWalk(hg.sptr(), seeds, metapath, prob_vec); auto result = sampling::RandomWalk(hg.sptr(), seeds, metapath, prob_vec);
List<Value> ret; List<Value> ret;
ret.push_back(Value(MakeValue(result.first))); ret.push_back(Value(MakeValue(std::get<0>(result))));
ret.push_back(Value(MakeValue(result.second))); ret.push_back(Value(MakeValue(std::get<1>(result))));
ret.push_back(Value(MakeValue(std::get<2>(result))));
*rv = ret; *rv = ret;
}); });
...@@ -135,8 +136,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithRestart ...@@ -135,8 +136,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithRestart
auto result = sampling::RandomWalkWithRestart( auto result = sampling::RandomWalkWithRestart(
hg.sptr(), seeds, metapath, prob_vec, restart_prob); hg.sptr(), seeds, metapath, prob_vec, restart_prob);
List<Value> ret; List<Value> ret;
ret.push_back(Value(MakeValue(result.first))); ret.push_back(Value(MakeValue(std::get<0>(result))));
ret.push_back(Value(MakeValue(result.second))); ret.push_back(Value(MakeValue(std::get<1>(result))));
ret.push_back(Value(MakeValue(std::get<2>(result))));
*rv = ret; *rv = ret;
}); });
...@@ -153,8 +155,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithStepwis ...@@ -153,8 +155,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithStepwis
auto result = sampling::RandomWalkWithStepwiseRestart( auto result = sampling::RandomWalkWithStepwiseRestart(
hg.sptr(), seeds, metapath, prob_vec, restart_prob); hg.sptr(), seeds, metapath, prob_vec, restart_prob);
List<Value> ret; List<Value> ret;
ret.push_back(Value(MakeValue(result.first))); ret.push_back(Value(MakeValue(std::get<0>(result))));
ret.push_back(Value(MakeValue(result.second))); ret.push_back(Value(MakeValue(std::get<1>(result))));
ret.push_back(Value(MakeValue(std::get<2>(result))));
*rv = ret; *rv = ret;
}); });
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <dgl/base_heterograph.h> #include <dgl/base_heterograph.h>
#include <dgl/array.h> #include <dgl/array.h>
#include <tuple>
#include <utility>
#include "randomwalks_impl.h" #include "randomwalks_impl.h"
namespace dgl { namespace dgl {
...@@ -32,16 +34,18 @@ namespace { ...@@ -32,16 +34,18 @@ namespace {
* \note The graph itself should be bounded in the closure of \c step. * \note The graph itself should be bounded in the closure of \c step.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray GenericRandomWalk( std::pair<IdArray, IdArray> GenericRandomWalk(
const IdArray seeds, const IdArray seeds,
int64_t max_num_steps, int64_t max_num_steps,
StepFunc<IdxType> step) { StepFunc<IdxType> step) {
int64_t num_seeds = seeds->shape[0]; int64_t num_seeds = seeds->shape[0];
int64_t trace_length = max_num_steps + 1; int64_t trace_length = max_num_steps + 1;
IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, seeds->ctx); IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, seeds->ctx);
IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, seeds->ctx);
const IdxType *seed_data = static_cast<IdxType *>(seeds->data); const IdxType *seed_data = seeds.Ptr<IdxType>();
IdxType *traces_data = static_cast<IdxType *>(traces->data); IdxType *traces_data = traces.Ptr<IdxType>();
IdxType *eids_data = eids.Ptr<IdxType>();
#pragma omp parallel for #pragma omp parallel for
for (int64_t seed_id = 0; seed_id < num_seeds; ++seed_id) { for (int64_t seed_id = 0; seed_id < num_seeds; ++seed_id) {
...@@ -51,16 +55,19 @@ IdArray GenericRandomWalk( ...@@ -51,16 +55,19 @@ IdArray GenericRandomWalk(
for (i = 0; i < max_num_steps; ++i) { for (i = 0; i < max_num_steps; ++i) {
const auto &succ = step(traces_data + seed_id * max_num_steps, curr, i); const auto &succ = step(traces_data + seed_id * max_num_steps, curr, i);
traces_data[seed_id * trace_length + i + 1] = curr = succ.first; traces_data[seed_id * trace_length + i + 1] = curr = std::get<0>(succ);
if (succ.second) eids_data[seed_id * max_num_steps + i] = std::get<1>(succ);
if (std::get<2>(succ))
break; break;
} }
for (; i < max_num_steps; ++i) for (; i < max_num_steps; ++i) {
traces_data[seed_id * trace_length + i + 1] = -1; traces_data[seed_id * trace_length + i + 1] = -1;
eids_data[seed_id * max_num_steps + i] = -1;
}
} }
return traces; return std::make_pair(traces, eids);
} }
}; // namespace }; // namespace
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <dgl/array.h> #include <dgl/array.h>
#include <vector> #include <vector>
#include <utility> #include <utility>
#include <tuple>
#include <functional> #include <functional>
namespace dgl { namespace dgl {
...@@ -27,8 +28,8 @@ namespace impl { ...@@ -27,8 +28,8 @@ namespace impl {
*/ */
template<typename IdxType> template<typename IdxType>
using StepFunc = std::function< using StepFunc = std::function<
// ID terminate? // ID Edge ID terminate?
std::pair<dgl_id_t, bool>( std::tuple<dgl_id_t, dgl_id_t, bool>(
IdxType *, // node IDs generated so far IdxType *, // node IDs generated so far
dgl_id_t, // last node ID dgl_id_t, // last node ID
int64_t)>; // # of steps int64_t)>; // # of steps
...@@ -52,11 +53,13 @@ TypeArray GetNodeTypesFromMetapath( ...@@ -52,11 +53,13 @@ TypeArray GetNodeTypesFromMetapath(
* each edge by edge type. An empty float array assumes uniform transition. * each edge by edge type. An empty float array assumes uniform transition.
* \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* A 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* \note This function should be called together with GetNodeTypesFromMetapath to * \note This function should be called together with GetNodeTypesFromMetapath to
* determine the node type of each node in the random walk traces. * determine the node type of each node in the random walk traces.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalk( std::pair<IdArray, IdArray> RandomWalk(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -73,11 +76,13 @@ IdArray RandomWalk( ...@@ -73,11 +76,13 @@ IdArray RandomWalk(
* \param restart_prob Restart probability * \param restart_prob Restart probability
* \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* A 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* \note This function should be called together with GetNodeTypesFromMetapath to * \note This function should be called together with GetNodeTypesFromMetapath to
* determine the node type of each node in the random walk traces. * determine the node type of each node in the random walk traces.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalkWithRestart( std::pair<IdArray, IdArray> RandomWalkWithRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
...@@ -97,11 +102,13 @@ IdArray RandomWalkWithRestart( ...@@ -97,11 +102,13 @@ IdArray RandomWalkWithRestart(
* as \c metapath, indicating the probability to terminate after transition. * as \c metapath, indicating the probability to terminate after transition.
* \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1. * paths that terminated early are padded with -1.
* A 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* \note This function should be called together with GetNodeTypesFromMetapath to * \note This function should be called together with GetNodeTypesFromMetapath to
* determine the node type of each node in the random walk traces. * determine the node type of each node in the random walk traces.
*/ */
template<DLDeviceType XPU, typename IdxType> template<DLDeviceType XPU, typename IdxType>
IdArray RandomWalkWithStepwiseRestart( std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart(
const HeteroGraphPtr hg, const HeteroGraphPtr hg,
const IdArray seeds, const IdArray seeds,
const TypeArray metapath, const TypeArray metapath,
......
...@@ -4,7 +4,7 @@ import numpy as np ...@@ -4,7 +4,7 @@ import numpy as np
import unittest import unittest
from collections import defaultdict from collections import defaultdict
def check_random_walk(g, metapath, traces, ntypes, prob=None): def check_random_walk(g, metapath, traces, ntypes, prob=None, trace_eids=None):
traces = F.asnumpy(traces) traces = F.asnumpy(traces)
ntypes = F.asnumpy(ntypes) ntypes = F.asnumpy(ntypes)
for j in range(traces.shape[1] - 1): for j in range(traces.shape[1] - 1):
...@@ -19,6 +19,9 @@ def check_random_walk(g, metapath, traces, ntypes, prob=None): ...@@ -19,6 +19,9 @@ def check_random_walk(g, metapath, traces, ntypes, prob=None):
p = F.asnumpy(g.edges[metapath[j]].data['p']) p = F.asnumpy(g.edges[metapath[j]].data['p'])
eids = g.edge_ids(traces[i, j], traces[i, j+1], etype=metapath[j]) eids = g.edge_ids(traces[i, j], traces[i, j+1], etype=metapath[j])
assert p[eids] != 0 assert p[eids] != 0
if trace_eids is not None:
u, v = g.find_edges(trace_eids[i, j], etype=metapath[j])
assert (u == traces[i, j]) and (v == traces[i, j + 1])
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented") @unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
def test_random_walk(): def test_random_walk():
...@@ -42,10 +45,10 @@ def test_random_walk(): ...@@ -42,10 +45,10 @@ def test_random_walk():
g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32) g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32)
traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4) traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes) check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0.) traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0., return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes) check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk( traces, ntypes = dgl.sampling.random_walk(
g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4,), F.float32, F.cpu())) g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4,), F.float32, F.cpu()))
check_random_walk(g1, ['follow'] * 4, traces, ntypes) check_random_walk(g1, ['follow'] * 4, traces, ntypes)
...@@ -56,13 +59,13 @@ def test_random_walk(): ...@@ -56,13 +59,13 @@ def test_random_walk():
g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5)) g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5))
assert (F.asnumpy(traces)[:, 5] == -1).all() assert (F.asnumpy(traces)[:, 5] == -1).all()
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4) g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes) check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p') g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p', return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p') check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
try: try:
traces, ntypes = dgl.sampling.random_walk( traces, ntypes = dgl.sampling.random_walk(
...@@ -73,32 +76,55 @@ def test_random_walk(): ...@@ -73,32 +76,55 @@ def test_random_walk():
assert fail assert fail
metapath = ['follow', 'view', 'viewed-by'] * 2 metapath = ['follow', 'view', 'viewed-by'] * 2
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g3, [0, 1, 2, 0, 1, 2], metapath=metapath) g3, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
check_random_walk(g3, metapath, traces, ntypes) check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids)
metapath = ['follow', 'view', 'viewed-by'] * 2 metapath = ['follow', 'view', 'viewed-by'] * 2
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath) g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, return_eids=True)
check_random_walk(g4, metapath, traces, ntypes) check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
metapath = ['follow', 'view', 'viewed-by'] * 2 metapath = ['follow', 'view', 'viewed-by'] * 2
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p') g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p') check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0.) g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0., return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p') check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p',
restart_prob=F.zeros((6,), F.float32, F.cpu())) restart_prob=F.zeros((6,), F.float32, F.cpu()), return_eids=True)
check_random_walk(g4, metapath, traces, ntypes, 'p') check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
traces, ntypes = dgl.sampling.random_walk( traces, eids, ntypes = dgl.sampling.random_walk(
g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p', g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p',
restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32)) restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True)
check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p') check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids)
assert (F.asnumpy(traces[:, 7]) == -1).all() assert (F.asnumpy(traces[:, 7]) == -1).all()
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
def test_node2vec():
g1 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
})
g2 = dgl.heterograph({
('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
})
g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
ntypes = F.zeros((5,), dtype=F.int64)
traces, eids = dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True)
check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
traces, eids = dgl.sampling.node2vec_random_walk(
g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob='p', return_eids=True)
check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU pack traces not implemented") @unittest.skipIf(F._default_context_str == 'gpu', reason="GPU pack traces not implemented")
def test_pack_traces(): def test_pack_traces():
traces, types = (np.array( traces, types = (np.array(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment