[Feature] Node2vec (#2992)

* add seal example * 1. add paper infomation in examples/README 2. adjust codes 3. option test * use latest `to_simple` to replace coalesce graph function * remove outdated codes * remove useless comment * Node2vec 1.implement node2vec random walk c++ op 2.implement node2vec model 3.implement node2vec example * add CMakeLists file modify * refine c++ codes * refine c++ codes * add missing whitespace * refine python codes * add codes * add node2vec_impl.h * fix codes * fix code style problem * fixes * remove * lots of changes * add benchmark * fixes Co-authored-by: smilexuhc <smile.xuhc@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>

[Feature] Node2vec (#2992)
* add seal example * 1. add paper infomation in examples/README 2. adjust codes 3. option test * use latest `to_simple` to replace coalesce graph function * remove outdated codes * remove useless comment * Node2vec 1.implement node2vec random walk c++ op 2.implement node2vec model 3.implement node2vec example * add CMakeLists file modify * refine c++ codes * refine c++ codes * add missing whitespace * refine python codes * add codes * add node2vec_impl.h * fix codes * fix code style problem * fixes * remove * lots of changes * add benchmark * fixes Co-authored-by: smilexuhc <smile.xuhc@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>
e667545d · Quan (Andy) Gan · GitHub · 73594814 · e667545d · e667545d
Unverified Commit e667545d authored Jun 23, 2021 by Quan (Andy) Gan Committed by GitHub Jun 23, 2021
20 changed files
--- a/benchmarks/benchmarks/api/bench_random_walk.py
+++ b/benchmarks/benchmarks/api/bench_random_walk.py
+import time
+import dgl
+import torch
+from .. import utils
+def _random_walk(g, seeds, length):
+    return dgl.sampling.random_walk(g, seeds, length=length)
+def _node2vec(g, seeds, length):
+    return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length)
+@utils.benchmark('time')
+@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
+@utils.parametrize('num_seeds', [10, 100, 1000])
+@utils.parametrize('length', [2, 5, 10, 20])
+@utils.parametrize('algorithm', ['_random_walk', '_node2vec'])
+def track_time(graph_name, num_seeds, length, algorithm):
+    device = utils.get_bench_device()
+    graph = utils.get_graph(graph_name, 'csr')
+    seeds = torch.randint(0, graph.num_nodes(), (num_seeds,))
+    print(graph_name, num_seeds, length)
+    alg = globals()[algorithm]
+    # dry run
+    for i in range(5):
+        _ = alg(graph, seeds, length=length)
+    # timing
+    with utils.Timer() as t:
+        for i in range(50):
+            _ = alg(graph, seeds, length=length)
+    return t.elapsed_secs / 50
--- a/examples/pytorch/node2vec/README.md
+++ b/examples/pytorch/node2vec/README.md
+# DGL Implementation of the Node2vec
+This DGL example implements the graph embedding model proposed in the paper 
+[node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) 
+The author's codes of implementation is in [Node2vec](https://github.com/aditya-grover/node2vec) 
+Example implementor
+----------------------
+This example was implemented by [Smile](https://github.com/Smilexuhc) during his intern work at the AWS Shanghai AI Lab.
+The graph dataset used in this example 
+---------------------------------------
+cora
+ - NumNodes: 2708
+ - NumEdges: 10556
+ogbn-products
+ - NumNodes: 2449029
+ - NumEdges: 61859140
+Dependencies
+--------------------------------
+- python 3.6+
+- Pytorch 1.5.0+
+- ogb  
+ How to run example files
+--------------------------------
+To train a node2vec model:
+```shell script
+python main.py --task="train"
+```
+To time node2vec random walks:
+```shell script
+python main.py --task="time" --runs=10
+```
+Performance
+-------------------------
+**Setting:** `walk_length=50, p=0.25, q=4.0`
+| Dataset  |     DGL     |     PyG     |
+| -------- | :---------: | :---------: |
+| cora     | 0.0092s | 0.0179s |
+| products | 66.22s  | 77.65s  |
+Note that the number in table are the average results of multiple trials.  
+For cora, we run 50 trials.  For ogbn-products, we run 10 trials.
--- a/examples/pytorch/node2vec/main.py
+++ b/examples/pytorch/node2vec/main.py
+import time
+from dgl.sampling import node2vec_random_walk
+from model import Node2vecModel
+from utils import load_graph, parse_arguments
+def time_randomwalk(graph, args):
+    """
+    Test cost time of random walk
+    """
+    start_time = time.time()
+    # default setting for testing
+    params = {'p': 0.25,
+              'q': 4,
+              'walk_length': 50}
+    for i in range(args.runs):
+        node2vec_random_walk(graph, graph.nodes(), **params)
+    end_time = time.time()
+    cost_time_avg = (end_time-start_time)/args.runs
+    print("Run dataset {} {} trials, mean run time: {:.3f}s".format(args.dataset, args.runs, cost_time_avg))
+def train_node2vec(graph, eval_set, args):
+    """
+    Train node2vec model
+    """
+    trainer = Node2vecModel(graph,
+                            embedding_dim=args.embedding_dim,
+                            walk_length=args.walk_length,
+                            p=args.p,
+                            q=args.q,
+                            num_walks=args.num_walks,
+                            eval_set=eval_set,
+                            eval_steps=1,
+                            device=args.device)
+    trainer.train(epochs=args.epochs, batch_size=args.batch_size, learning_rate=0.01)
+if __name__ == '__main__':
+    args = parse_arguments()
+    graph, eval_set = load_graph(args.dataset)
+    if args.task == 'train':
+        print("Perform training node2vec model")
+        train_node2vec(graph, eval_set, args)
+    elif args.task == 'time':
+        print("Timing random walks")
+        time_randomwalk(graph, args)
+    else:
+        raise ValueError('Task type error!')
--- a/examples/pytorch/node2vec/model.py
+++ b/examples/pytorch/node2vec/model.py
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from sklearn.linear_model import LogisticRegression
+from dgl.sampling import node2vec_random_walk
+class Node2vec(nn.Module):
+    """Node2vec model from paper node2vec: Scalable Feature Learning for Networks <https://arxiv.org/abs/1607.00653>
+    Attributes
+    ----------
+    g: DGLGraph
+        The graph.
+    embedding_dim: int
+        Dimension of node embedding.
+    walk_length: int
+        Length of each trace.
+    p: float
+        Likelihood of immediately revisiting a node in the walk.  Same notation as in the paper.
+    q: float
+        Control parameter to interpolate between breadth-first strategy and depth-first strategy.
+        Same notation as in the paper.
+    num_walks: int
+        Number of random walks for each node. Default: 10.
+    window_size: int
+        Maximum distance between the center node and predicted node. Default: 5.
+    num_negatives: int
+        The number of negative samples for each positive sample.  Default: 5.
+    use_sparse: bool
+        If set to True, use PyTorch's sparse embedding and optimizer. Default: ``True``.
+    weight_name : str, optional
+        The name of the edge feature tensor on the graph storing the (unnormalized)
+        probabilities associated with each edge for choosing the next node.
+        The feature tensor must be non-negative and the sum of the probabilities
+        must be positive for the outbound edges of all nodes (although they don't have
+        to sum up to one).  The result will be undefined otherwise.
+        If omitted, DGL assumes that the neighbors are picked uniformly.
+    """
+    def __init__(self, g, embedding_dim, walk_length, p, q, num_walks=10, window_size=5, num_negatives=5,
+                 use_sparse=True, weight_name=None):
+        super(Node2vec, self).__init__()
+        assert walk_length >= window_size
+        self.g = g
+        self.embedding_dim = embedding_dim
+        self.walk_length = walk_length
+        self.p = p
+        self.q = q
+        self.num_walks = num_walks
+        self.window_size = window_size
+        self.num_negatives = num_negatives
+        self.N = self.g.num_nodes()
+        if weight_name is not None:
+            self.prob = weight_name
+        else:
+            self.prob = None
+        self.embedding = nn.Embedding(self.N, embedding_dim, sparse=use_sparse)
+    def reset_parameters(self):
+        self.embedding.reset_parameters()
+    def sample(self, batch):
+        """
+        Generate positive and negative samples.
+        Positive samples are generated from random walk
+        Negative samples are generated from random sampling
+        """
+        if not isinstance(batch, torch.Tensor):
+            batch = torch.tensor(batch)
+        batch = batch.repeat(self.num_walks)
+        # positive
+        pos_traces = node2vec_random_walk(self.g, batch, self.p, self.q, self.walk_length, self.prob)
+        pos_traces = pos_traces.unfold(1, self.window_size, 1)  # rolling window
+        pos_traces = pos_traces.contiguous().view(-1, self.window_size)
+        # negative
+        neg_batch = batch.repeat(self.num_negatives)
+        neg_traces = torch.randint(self.N, (neg_batch.size(0), self.walk_length))
+        neg_traces = torch.cat([neg_batch.view(-1, 1), neg_traces], dim=-1)
+        neg_traces = neg_traces.unfold(1, self.window_size, 1)  # rolling window
+        neg_traces = neg_traces.contiguous().view(-1, self.window_size)
+        return pos_traces, neg_traces
+    def forward(self, nodes=None):
+        """
+        Returns the embeddings of the input nodes
+        Parameters
+        ----------
+        nodes: Tensor, optional
+            Input nodes, if set `None`, will return all the node embedding.
+        Returns
+        -------
+        Tensor
+            Node embedding
+        """
+        emb = self.embedding.weight
+        if nodes is None:
+            return emb
+        else:
+            return emb[nodes]
+    def loss(self, pos_trace, neg_trace):
+        """
+        Computes the loss given positive and negative random walks.
+        Parameters
+        ----------
+        pos_trace: Tensor
+            positive random walk trace
+        neg_trace: Tensor
+            negative random walk trace
+        """
+        e = 1e-15
+        # Positive
+        pos_start, pos_rest = pos_trace[:, 0], pos_trace[:, 1:].contiguous()  # start node and following trace
+        w_start = self.embedding(pos_start).unsqueeze(dim=1)
+        w_rest = self.embedding(pos_rest)
+        pos_out = (w_start * w_rest).sum(dim=-1).view(-1)
+        # Negative
+        neg_start, neg_rest = neg_trace[:, 0], neg_trace[:, 1:].contiguous()
+        w_start = self.embedding(neg_start).unsqueeze(dim=1)
+        w_rest = self.embedding(neg_rest)
+        neg_out = (w_start * w_rest).sum(dim=-1).view(-1)
+        # compute loss
+        pos_loss = -torch.log(torch.sigmoid(pos_out) + e).mean()
+        neg_loss = -torch.log(1 - torch.sigmoid(neg_out) + e).mean()
+        return pos_loss + neg_loss
+    def loader(self, batch_size):
+        """
+        Parameters
+        ----------
+        batch_size: int
+            batch size
+        Returns
+        -------
+        DataLoader
+            Node2vec training data loader
+        """
+        return DataLoader(torch.arange(self.N), batch_size=batch_size, shuffle=True, collate_fn=self.sample)
+    @torch.no_grad()
+    def evaluate(self, x_train, y_train, x_val, y_val):
+        """
+        Evaluate the quality of embedding vector via a downstream classification task with logistic regression.
+        """
+        x_train = self.forward(x_train)
+        x_val = self.forward(x_val)
+        x_train, y_train = x_train.cpu().numpy(), y_train.cpu().numpy()
+        x_val, y_val = x_val.cpu().numpy(), y_val.cpu().numpy()
+        lr = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=150).fit(x_train, y_train)
+        return lr.score(x_val, y_val)
+class Node2vecModel(object):
+    """
+    Wrapper of the ``Node2Vec`` class with a ``train`` method.
+    Attributes
+    ----------
+    g: DGLGraph
+        The graph.
+    embedding_dim: int
+        Dimension of node embedding.
+    walk_length: int
+        Length of each trace.
+    p: float
+        Likelihood of immediately revisiting a node in the walk.
+    q: float
+        Control parameter to interpolate between breadth-first strategy and depth-first strategy.
+    num_walks: int
+        Number of random walks for each node. Default: 10.
+    window_size: int
+        Maximum distance between the center node and predicted node. Default: 5.
+    num_negatives: int
+        The number of negative samples for each positive sample.  Default: 5.
+    use_sparse: bool
+        If set to True, uses PyTorch's sparse embedding and optimizer. Default: ``True``.
+    weight_name : str, optional
+        The name of the edge feature tensor on the graph storing the (unnormalized)
+        probabilities associated with each edge for choosing the next node.
+        The feature tensor must be non-negative and the sum of the probabilities
+        must be positive for the outbound edges of all nodes (although they don't have
+        to sum up to one).  The result will be undefined otherwise.
+        If omitted, DGL assumes that the neighbors are picked uniformly. Default: ``None``.
+    eval_set: list of tuples (Tensor, Tensor)
+        [(nodes_train,y_train),(nodes_val,y_val)]
+        If omitted, model will not be evaluated. Default: ``None``.
+    eval_steps: int
+        Interval steps of evaluation.
+        if set <= 0, model will not be evaluated. Default: ``None``.
+    device: str
+        device, default 'cpu'.
+    """
+    def __init__(self, g, embedding_dim, walk_length, p=1.0, q=1.0, num_walks=1, window_size=5,
+                 num_negatives=5, use_sparse=True, weight_name=None, eval_set=None, eval_steps=-1, device='cpu'):
+        self.model = Node2vec(g, embedding_dim, walk_length, p, q, num_walks,
+                              window_size, num_negatives, use_sparse, weight_name)
+        self.g = g
+        self.use_sparse = use_sparse
+        self.eval_steps = eval_steps
+        self.eval_set = eval_set
+        if device == 'cpu':
+            self.device = device
+        else:
+            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    def _train_step(self, model, loader, optimizer, device):
+        model.train()
+        total_loss = 0
+        for pos_traces, neg_traces in loader:
+            pos_traces, neg_traces = pos_traces.to(device), neg_traces.to(device)
+            optimizer.zero_grad()
+            loss = model.loss(pos_traces, neg_traces)
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+        return total_loss / len(loader)
+    @torch.no_grad()
+    def _evaluate_step(self):
+        nodes_train, y_train = self.eval_set[0]
+        nodes_val, y_val = self.eval_set[1]
+        acc = self.model.evaluate(nodes_train, y_train, nodes_val, y_val)
+        return acc
+    def train(self, epochs, batch_size, learning_rate=0.01):
+        """
+        Parameters
+        ----------
+        epochs: int
+            num of train epoch
+        batch_size: int
+            batch size
+        learning_rate: float
+            learning rate. Default 0.01.
+        """
+        self.model = self.model.to(self.device)
+        loader = self.model.loader(batch_size)
+        if self.use_sparse:
+            optimizer = torch.optim.SparseAdam(list(self.model.parameters()), lr=learning_rate)
+        else:
+            optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)
+        for i in range(epochs):
+            loss = self._train_step(self.model, loader, optimizer, self.device)
+            if self.eval_steps > 0:
+                if epochs % self.eval_steps == 0:
+                    acc = self._evaluate_step()
+                    print("Epoch: {}, Train Loss: {:.4f}, Val Acc: {:.4f}".format(i, loss, acc))
+    def embedding(self, nodes=None):
+        """
+        Returns the embeddings of the input nodes
+        Parameters
+        ----------
+        nodes: Tensor, optional
+            Input nodes, if set `None`, will return all the node embedding.
+        Returns
+        -------
+        Tensor
+            Node embedding.
+        """
+        return self.model(nodes)
--- a/examples/pytorch/node2vec/utils.py
+++ b/examples/pytorch/node2vec/utils.py
+import argparse
+from dgl.data import CitationGraphDataset
+from ogb.nodeproppred import *
+from ogb.linkproppred import *
+def load_graph(name):
+    cite_graphs = ['cora', 'citeseer', 'pubmed']
+    if name in cite_graphs:
+        dataset = CitationGraphDataset(name)
+        graph = dataset[0]
+        nodes = graph.nodes()
+        y = graph.ndata['label']
+        train_mask = graph.ndata['train_mask']
+        val_mask = graph.ndata['test_mask']
+        nodes_train, y_train = nodes[train_mask], y[train_mask]
+        nodes_val, y_val = nodes[val_mask], y[val_mask]
+        eval_set = [(nodes_train, y_train), (nodes_val, y_val)]
+    elif name.startswith('ogbn'):
+        dataset = DglNodePropPredDataset(name)
+        graph, y = dataset[0]
+        split_nodes = dataset.get_idx_split()
+        nodes = graph.nodes()
+        train_idx = split_nodes['train']
+        val_idx = split_nodes['valid']
+        nodes_train, y_train = nodes[train_idx], y[train_idx]
+        nodes_val, y_val = nodes[val_idx], y[val_idx]
+        eval_set = [(nodes_train, y_train), (nodes_val, y_val)]
+    else:
+        raise ValueError("Dataset name error!")
+    return graph, eval_set
+def parse_arguments():
+    """
+    Parse arguments
+    """
+    parser = argparse.ArgumentParser(description='Node2vec')
+    parser.add_argument('--dataset', type=str, default='cora')
+    # 'train' for training node2vec model, 'time' for testing speed of random walk
+    parser.add_argument('--task', type=str, default='train')
+    parser.add_argument('--runs', type=int, default=10)
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--embedding_dim', type=int, default=128)
+    parser.add_argument('--walk_length', type=int, default=50)
+    parser.add_argument('--p', type=float, default=0.25)
+    parser.add_argument('--q', type=float, default=4.0)
+    parser.add_argument('--num_walks', type=int, default=10)
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--batch_size', type=int, default=128)
+    args = parser.parse_args()
+    return args
--- a/include/dgl/sampling/randomwalks.h
+++ b/include/dgl/sampling/randomwalks.h
@@ -10,6 +10,7 @@
 #include <dgl/array.h>
 #include <vector>
 #include <utility>
+#include <tuple>
 namespace dgl {
@@ -26,9 +27,11 @@ namespace sampling {
 * \return A pair of
 *         1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *            paths that terminated early are padded with -1.
- *         2. One 1D array of shape (len(metapath) + 1) with node type IDs.
+ *         2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *            paths that terminated early are padded with -1.
+ *         3. One 1D array of shape (len(metapath) + 1) with node type IDs.
 */
-std::pair<IdArray, TypeArray> RandomWalk(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalk(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -46,9 +49,11 @@ std::pair<IdArray, TypeArray> RandomWalk(
 * \return A pair of
 *         1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *            paths that terminated early are padded with -1.
- *         2. One 1D array of shape (len(metapath) + 1) with node type IDs.
+ *         2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *            paths that terminated early are padded with -1.
+ *         3. One 1D array of shape (len(metapath) + 1) with node type IDs.
 */
-std::pair<IdArray, TypeArray> RandomWalkWithRestart(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -69,9 +74,11 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart(
 * \return A pair of
 *         1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *            paths that terminated early are padded with -1.
- *         2. One 1D array of shape (len(metapath) + 1) with node type IDs.
+ *         2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *            paths that terminated early are padded with -1.
+ *         3. One 1D array of shape (len(metapath) + 1) with node type IDs.
 */
-std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithStepwiseRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,

--- a/python/dgl/sampling/__init__.py
+++ b/python/dgl/sampling/__init__.py
@@ -8,3 +8,4 @@ gives a holistic explanation on how different components work together.
 from .randomwalks import *
 from .pinsage import *
 from .neighbor import *
+from .node2vec_randomwalk import *
--- a/python/dgl/sampling/node2vec_randomwalk.py
+++ b/python/dgl/sampling/node2vec_randomwalk.py
+"""Node2vec random walk"""
+from .._ffi.function import _init_api
+from .. import backend as F
+from .. import ndarray as nd
+from .. import utils
+# pylint: disable=invalid-name
+__all__ = ['node2vec_random_walk']
+def node2vec_random_walk(g, nodes, p, q, walk_length, prob=None, return_eids=False):
+    """
+    Generate random walk traces from an array of starting nodes based on the node2vec model.
+    Paper: `node2vec: Scalable Feature Learning for Networks
+    <https://arxiv.org/abs/1607.00653>`__.
+    The returned traces all have length ``walk_length + 1``, where the first node
+    is the starting node itself.
+    Note that if a random walk stops in advance, DGL pads the trace with -1 to have the same
+    length.
+    Parameters
+    ----------
+    g : DGLGraph
+        The graph.  Must be on CPU.
+        Note that node2vec only support homogeneous graph.
+    nodes : Tensor
+        Node ID tensor from which the random walk traces starts.
+        The tensor must be on CPU, and must have the same dtype as the ID type
+        of the graph.
+    p: float
+        Likelihood of immediately revisiting a node in the walk.
+    q: float
+        Control parameter to interpolate between breadth-first strategy and depth-first strategy.
+    walk_length: int
+        Length of random walks.
+    prob : str, optional
+        The name of the edge feature tensor on the graph storing the (unnormalized)
+        probabilities associated with each edge for choosing the next node.
+        The feature tensor must be non-negative and the sum of the probabilities
+        must be positive for the outbound edges of all nodes (although they don't have
+        to sum up to one).  The result will be undefined otherwise.
+        If omitted, DGL assumes that the neighbors are picked uniformly.
+    return_eids : bool, optional
+        If True, additionally return the edge IDs traversed.
+        Default: False.
+    Returns
+    -------
+    traces : Tensor
+        A 2-dimensional node ID tensor with shape ``(num_seeds, walk_length + 1)``.
+    eids : Tensor, optional
+        A 2-dimensional edge ID tensor with shape ``(num_seeds, length)``.
+        Only returned if :attr:`return_eids` is True.
+    Examples
+    --------
+    >>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]))
+    >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4)
+    tensor([[0, 1, 3, 0, 1],
+            [1, 2, 0, 1, 3],
+            [2, 0, 1, 3, 0],
+            [0, 1, 2, 0, 1]])
+    >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, length=4, return_eids=True)
+    (tensor([[0, 1, 3, 0, 1],
+             [1, 2, 0, 1, 2],
+             [2, 0, 1, 2, 0],
+             [0, 1, 2, 0, 1]]),
+     tensor([[0, 2, 4, 0],
+             [1, 3, 0, 1],
+             [3, 0, 1, 3],
+             [0, 1, 3, 0]]))
+    """
+    assert g.device == F.cpu(), "Graph must be on CPU."
+    gidx = g._graph
+    nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, 'nodes'))
+    if prob is None:
+        prob_nd = nd.array([], ctx=nodes.ctx)
+    else:
+        prob_nd = F.to_dgl_nd(g.edata[prob])
+    traces, eids = _CAPI_DGLSamplingNode2vec(gidx, nodes, p, q, walk_length, prob_nd)
+    traces = F.from_dgl_nd(traces)
+    eids = F.from_dgl_nd(eids)
+    return (traces, eids) if return_eids else traces
+_init_api('dgl.sampling.randomwalks', __name__)
--- a/python/dgl/sampling/randomwalks.py
+++ b/python/dgl/sampling/randomwalks.py
@@ -11,7 +11,8 @@ __all__ = [
    'random_walk',
    'pack_traces']
-def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob=None):
+def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob=None,
+                return_eids=False):
    """Generate random walk traces from an array of starting nodes based on the given metapath.
    For a single starting node, ``num_traces`` traces would be generated.  A trace would
@@ -62,12 +63,20 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
        If a tensor is given, :attr:`restart_prob` should have the same length as
        :attr:`metapath` or :attr:`length`.
+    return_eids : bool, optional
+        If True, additionally return the edge IDs traversed.
+        Default: False.
    Returns
    -------
    traces : Tensor
        A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or
        ``(num_seeds, length + 1)`` if :attr:`metapath` is None.
+    eids : Tensor, optional
+        A 2-dimensional edge ID tensor with shape ``(num_seeds, len(metapath))`` or
+        ``(num_seeds, length)`` if :attr:`metapath` is None.  Only returned if
+        :attr:`return_eids` is True.
    types : Tensor
        A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or
        ``(length + 1)``.
@@ -90,6 +99,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
             [2, 0, 1, 3, 0],
             [0, 1, 2, 0, 1]]), tensor([0, 0, 0, 0, 0]))
+    Or returning edge IDs:
+    >>> dgl.sampling.random_walk(g1, [0, 1, 2, 0], length=4, return_eids=True)
+    (tensor([[0, 1, 2, 0, 1],
+             [1, 3, 0, 1, 2],
+             [2, 0, 1, 3, 0],
+             [0, 1, 3, 0, 1]]),
+     tensor([[0, 1, 3, 0],
+             [2, 4, 0, 1],
+             [3, 0, 2, 4],
+             [0, 2, 4, 0]]),
+     tensor([0, 0, 0, 0, 0]))
    The first tensor indicates the random walk path for each seed node.
    The j-th element in the second tensor indicates the node type ID of the j-th node
    in every path.  In this case, it is returning all 0.
@@ -170,18 +192,19 @@ def random_walk(g, nodes, *, metapath=None, length=None, prob=None, restart_prob
    # Actual random walk
    if restart_prob is None:
-        traces, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd)
+        traces, eids, types = _CAPI_DGLSamplingRandomWalk(gidx, nodes, metapath, p_nd)
    elif F.is_tensor(restart_prob):
        restart_prob = F.to_dgl_nd(restart_prob)
-        traces, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart(
+        traces, eids, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart(
            gidx, nodes, metapath, p_nd, restart_prob)
    else:
-        traces, types = _CAPI_DGLSamplingRandomWalkWithRestart(
+        traces, eids, types = _CAPI_DGLSamplingRandomWalkWithRestart(
            gidx, nodes, metapath, p_nd, restart_prob)
    traces = F.from_dgl_nd(traces)
    types = F.from_dgl_nd(types)
-    return traces, types
+    eids = F.from_dgl_nd(eids)
+    return (traces, eids, types) if return_eids else (traces, types)
 def pack_traces(traces, types):
    """Pack the padded traces returned by ``random_walk()`` into a concatenated array.

--- a/src/graph/sampling/randomwalks/metapath_randomwalk.h
+++ b/src/graph/sampling/randomwalks/metapath_randomwalk.h
@@ -11,6 +11,7 @@
 #include <dgl/base_heterograph.h>
 #include <dgl/random.h>
 #include <utility>
+#include <tuple>
 #include <vector>
 #include "randomwalks_impl.h"
 #include "randomwalks_cpu.h"
@@ -47,14 +48,15 @@ using TerminatePredicate = std::function<bool(IdxType *, dgl_id_t, int64_t)>;
 * \param prob Transition probability per edge type.
 * \param terminate Predicate for terminating the current random walk path.
 *
- * \return A pair of ID of next successor (-1 if not exist), as well as whether to terminate.
+ * \return A tuple of ID of next successor (-1 if not exist), the last traversed edge
+ *         ID, as well as whether to terminate.
 */
 template<DLDeviceType XPU, typename IdxType>
-std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
+std::tuple<dgl_id_t, dgl_id_t, bool> MetapathRandomWalkStep(
    IdxType *data,
    dgl_id_t curr,
    int64_t len,
-    const std::vector<std::vector<IdArray> > &edges_by_type,
+    const std::vector<CSRMatrix> &edges_by_type,
    const IdxType *metapath_data,
    const std::vector<FloatArray> &prob,
    TerminatePredicate<IdxType> terminate) {
@@ -65,14 +67,16 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
  // construction) as much as possible.
  // Using Successors() slows down by 2x.
  // Using OutEdges() slows down by 10x.
-  const std::vector<NDArray> &csr_arrays = edges_by_type[etype];
+  const CSRMatrix &csr = edges_by_type[etype];
-  const IdxType *offsets = static_cast<IdxType *>(csr_arrays[0]->data);
+  const IdxType *offsets = csr.indptr.Ptr<IdxType>();
-  const IdxType *all_succ = static_cast<IdxType *>(csr_arrays[1]->data);
+  const IdxType *all_succ = csr.indices.Ptr<IdxType>();
+  const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
  const IdxType *succ = all_succ + offsets[curr];
+  const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
  const int64_t size = offsets[curr + 1] - offsets[curr];
  if (size == 0)
-    return std::make_pair(-1, true);
+    return std::make_tuple(-1, -1, true);
  // Use a reference to the original array instead of copying
  // This avoids updating the ref counts atomically from different threads
@@ -83,22 +87,18 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
    // empty probability array; assume uniform
    idx = RandomEngine::ThreadLocal()->RandInt(size);
  } else {
-    // non-uniform random walk
-    const IdxType *all_eids = static_cast<IdxType *>(csr_arrays[2]->data);
-    const IdxType *eids = all_eids + offsets[curr];
    ATEN_FLOAT_TYPE_SWITCH(prob_etype->dtype, DType, "probability", {
      FloatArray prob_selected = FloatArray::Empty({size}, prob_etype->dtype, prob_etype->ctx);
-      DType *prob_selected_data = static_cast<DType *>(prob_selected->data);
+      DType *prob_selected_data = prob_selected.Ptr<DType>();
-      const DType *prob_etype_data = static_cast<DType *>(prob_etype->data);
+      const DType *prob_etype_data = prob_etype.Ptr<DType>();
      for (int64_t j = 0; j < size; ++j)
-        prob_selected_data[j] = prob_etype_data[eids[j]];
+        prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]];
      idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
    });
  }
-  curr = succ[idx];
+  dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
-  return std::make_pair(curr, terminate(data, curr, len));
+  return std::make_tuple(succ[idx], eid, terminate(data, curr, len));
 }
 /*!
@@ -119,11 +119,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStep(
 * \note This function is called only if all the probability arrays are null.
 */
 template<DLDeviceType XPU, typename IdxType>
-std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform(
+std::tuple<dgl_id_t, dgl_id_t, bool> MetapathRandomWalkStepUniform(
    IdxType *data,
    dgl_id_t curr,
    int64_t len,
-    const std::vector<std::vector<IdArray> > &edges_by_type,
+    const std::vector<CSRMatrix> &edges_by_type,
    const IdxType *metapath_data,
    const std::vector<FloatArray> &prob,
    TerminatePredicate<IdxType> terminate) {
@@ -134,21 +134,23 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform(
  // construction) as much as possible.
  // Using Successors() slows down by 2x.
  // Using OutEdges() slows down by 10x.
-  const std::vector<NDArray> &csr_arrays = edges_by_type[etype];
+  const CSRMatrix &csr = edges_by_type[etype];
-  const IdxType *offsets = static_cast<IdxType *>(csr_arrays[0]->data);
+  const IdxType *offsets = csr.indptr.Ptr<IdxType>();
-  const IdxType *all_succ = static_cast<IdxType *>(csr_arrays[1]->data);
+  const IdxType *all_succ = csr.indices.Ptr<IdxType>();
+  const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
  const IdxType *succ = all_succ + offsets[curr];
+  const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
  const int64_t size = offsets[curr + 1] - offsets[curr];
  if (size == 0)
-    return std::make_pair(-1, true);
+    return std::make_tuple(-1, -1, true);
  IdxType idx = 0;
  // Guaranteed uniform distribution
  idx = RandomEngine::ThreadLocal()->RandInt(size);
-  curr = succ[idx];
+  dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
-  return std::make_pair(curr, terminate(data, curr, len));
+  return std::make_tuple(succ[idx], eid, terminate(data, curr, len));
 }
 /*!
@@ -160,10 +162,11 @@ std::pair<dgl_id_t, bool> MetapathRandomWalkStepUniform(
 * \param prob A vector of 1D float arrays, indicating the transition probability of
 *        each edge by edge type.  An empty float array assumes uniform transition.
 * \param terminate Predicate for terminating a random walk path.
- * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.
+ * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs, and
+ *         A 2D array of shape (len(seeds), len(metapath)) with edge IDs.
 */
 template<DLDeviceType XPU, typename IdxType>
-IdArray MetapathBasedRandomWalk(
+std::pair<IdArray, IdArray> MetapathBasedRandomWalk(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -176,13 +179,12 @@ IdArray MetapathBasedRandomWalk(
  // This forces the heterograph to materialize all OutCSR's before the OpenMP loop;
  // otherwise data races will happen.
  // TODO(BarclayII): should we later on materialize COO/CSR/CSC anyway unless told otherwise?
-  std::vector<std::vector<IdArray> > edges_by_type;
+  std::vector<CSRMatrix> edges_by_type;
  for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype)
-    edges_by_type.push_back(hg->GetAdj(etype, true, "csr"));
+    edges_by_type.push_back(hg->GetCSRMatrix(etype));
  // Hoist the check for Uniform vs Non uniform edge distribution
  // to avoid putting it on the hot path
-  StepFunc<IdxType> step;
  bool isUniform = true;
  for (const auto &etype_prob : prob) {
    if (!IsNullArray(etype_prob)) {
@@ -191,22 +193,22 @@ IdArray MetapathBasedRandomWalk(
    }
  }
  if (!isUniform) {
-    step =
+    StepFunc<IdxType> step =
      [&edges_by_type, metapath_data, &prob, terminate]
      (IdxType *data, dgl_id_t curr, int64_t len) {
        return MetapathRandomWalkStep<XPU, IdxType>(
            data, curr, len, edges_by_type, metapath_data, prob, terminate);
      };
+    return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
  } else {
-    step =
+    StepFunc<IdxType> step =
      [&edges_by_type, metapath_data, &prob, terminate]
      (IdxType *data, dgl_id_t curr, int64_t len) {
        return MetapathRandomWalkStepUniform<XPU, IdxType>(
            data, curr, len, edges_by_type, metapath_data, prob, terminate);
      };
-  }
    return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
+  }
 }
 };  // namespace

--- a/src/graph/sampling/randomwalks/node2vec.cc
+++ b/src/graph/sampling/randomwalks/node2vec.cc
+/*!
+ *  Copyright (c) 2021 by Contributors
+ * \file graph/sampling/node2vec.cc
+ * \brief Dispatcher of DGL node2vec random walks
+ */
+#include <dgl/array.h>
+#include <dgl/packed_func_ext.h>
+#include <dgl/runtime/container.h>
+#include "../../../c_api_common.h"
+#include "node2vec_impl.h"
+using namespace dgl::runtime;
+using namespace dgl::aten;
+namespace dgl {
+namespace sampling {
+namespace {
+void CheckNode2vecInputs(const HeteroGraphPtr hg, const IdArray seeds,
+                         const double p, const double q,
+                         const int64_t walk_length, const FloatArray &prob) {
+  CHECK_INT(seeds, "seeds");
+  CHECK_NDIM(seeds, 1, "seeds");
+  CHECK_FLOAT(prob, "probability");
+  CHECK_NDIM(prob, 1, "probability");
+}
+std::pair<IdArray, IdArray> Node2vec(
+    const HeteroGraphPtr hg, const IdArray seeds, const double p,
+    const double q, const int64_t walk_length,
+    const FloatArray &prob) {
+  CheckNode2vecInputs(hg, seeds, p, q, walk_length, prob);
+  std::pair<IdArray, IdArray> result;
+  ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "Node2vec", {
+    ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
+      result = impl::Node2vec<XPU, IdxType>(hg, seeds, p, q, walk_length, prob);
+    });
+  });
+  return result;
+}
+DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingNode2vec")
+    .set_body([](DGLArgs args, DGLRetValue *rv) {
+      HeteroGraphRef hg = args[0];
+      IdArray seeds = args[1];
+      double p = args[2];
+      double q = args[3];
+      int64_t walk_length = args[4];
+      FloatArray prob = args[5];
+      auto result =
+          sampling::Node2vec(hg.sptr(), seeds, p, q, walk_length, prob);
+      List<Value> ret;
+      ret.push_back(Value(MakeValue(result.first)));
+      ret.push_back(Value(MakeValue(result.second)));
+      *rv = ret;
+    });
+}  // namespace
+}  // namespace sampling
+}  // namespace dgl
--- a/src/graph/sampling/randomwalks/node2vec_cpu.cc
+++ b/src/graph/sampling/randomwalks/node2vec_cpu.cc
+/*!
+ *  Copyright (c) 2021 by Contributors
+ * \file graph/sampling/node2vec_cpu.cc
+ * \brief DGL sampler - CPU implementation of node2vec random walk with OpenMP
+ */
+#include <dgl/array.h>
+#include <dgl/base_heterograph.h>
+#include <utility>
+#include "node2vec_randomwalk.h"
+namespace dgl {
+using namespace dgl::runtime;
+using namespace dgl::aten;
+namespace sampling {
+namespace impl {
+template <DLDeviceType XPU, typename IdxType>
+std::pair<IdArray, IdArray> Node2vec(
+    const HeteroGraphPtr hg, const IdArray seeds, const double p,
+    const double q, const int64_t walk_length,
+    const FloatArray &prob) {
+  TerminatePredicate<IdxType> terminate = [](IdxType *data, dgl_id_t curr,
+                                             int64_t len) { return false; };
+  return Node2vecRandomWalk<XPU, IdxType>(hg, seeds, p, q, walk_length, prob,
+                                          terminate);
+}
+template std::pair<IdArray, IdArray> Node2vec<kDLCPU, int32_t>(
+    const HeteroGraphPtr hg,
+    const IdArray seeds, const double p,
+    const double q,
+    const int64_t walk_length,
+    const FloatArray &prob);
+template std::pair<IdArray, IdArray> Node2vec<kDLCPU, int64_t>(
+    const HeteroGraphPtr hg,
+    const IdArray seeds, const double p,
+    const double q,
+    const int64_t walk_length,
+    const FloatArray &prob);
+};  // namespace impl
+};  // namespace sampling
+};  // namespace dgl
--- a/src/graph/sampling/randomwalks/node2vec_impl.h
+++ b/src/graph/sampling/randomwalks/node2vec_impl.h
+/*!
+ *  Copyright (c) 2021 by Contributors
+ * \file graph/sampling/node2vec_impl.h
+ * \brief DGL sampler - templated implementation definition of node2vec random
+ * walks
+ */
+#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
+#define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
+#include <dgl/array.h>
+#include <dgl/base_heterograph.h>
+#include <functional>
+#include <utility>
+#include <vector>
+#include <tuple>
+namespace dgl {
+using namespace dgl::runtime;
+using namespace dgl::aten;
+namespace sampling {
+namespace impl {
+/*!
+ * \brief Node2vec random walk.
+ * \param hg The heterograph.
+ * \param seeds A 1D array of seed nodes, with the type the source type of the
+ * first edge type in the metapath.
+ * \param p Float, indicating likelihood of immediately revisiting a node in the walk.
+ * \param q Float, control parameter to interpolate between breadth-first strategy and
+ *        depth-first strategy.
+ * \param walk_length Int, length of walk.
+ * \param prob A vector of 1D float arrays, indicating the transition
+ *        probability of each edge by edge type.  An empty float array assumes uniform
+ *        transition.
+ * \return A 2D array of shape (len(seeds), len(walk_length) + 1)
+ *         with node IDs.  The paths that terminated early are padded with -1.
+ */
+template <DLDeviceType XPU, typename IdxType>
+std::pair<IdArray, IdArray> Node2vec(
+    const HeteroGraphPtr hg, const IdArray seeds, const double p,
+    const double q, const int64_t walk_length,
+    const FloatArray &prob);
+};  // namespace impl
+};  // namespace sampling
+};  // namespace dgl
+#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_
--- a/src/graph/sampling/randomwalks/node2vec_randomwalk.h
+++ b/src/graph/sampling/randomwalks/node2vec_randomwalk.h
+/*!
+ *  Copyright (c) 2021 by Contributors
+ * \file graph/sampling/node2vec_randomwalk.cc
+ * \brief DGL sampler - CPU implementation of node2vec random walk.
+ */
+#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
+#define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
+#include <dgl/array.h>
+#include <dgl/base_heterograph.h>
+#include <dgl/random.h>
+#include <algorithm>
+#include <cmath>
+#include <functional>
+#include <utility>
+#include <vector>
+#include <tuple>
+#include "node2vec_impl.h"
+#include "randomwalks_cpu.h"
+#include "metapath_randomwalk.h"  // for TerminatePredicate
+namespace dgl {
+using namespace dgl::runtime;
+using namespace dgl::aten;
+namespace sampling {
+namespace impl {
+namespace {
+template <typename IdxType>
+bool has_edge_between(const CSRMatrix &csr, dgl_id_t u,
+                      dgl_id_t v) {
+  const IdxType *offsets = csr.indptr.Ptr<IdxType>();
+  const IdxType *all_succ = csr.indices.Ptr<IdxType>();
+  const IdxType *u_succ = all_succ + offsets[u];
+  const int64_t size = offsets[u + 1] - offsets[u];
+  if (csr.sorted)
+    return std::binary_search(u_succ, u_succ + size, v);
+  else
+    return std::find(u_succ, u_succ + size, v) != u_succ + size;
+}
+/*!
+ * \brief Node2vec random walk step function
+ * \param data The path generated so far, of type \c IdxType.
+ * \param curr The last node ID generated.
+ * \param pre The last last node ID generated
+ * \param p Float, indicating likelihood of immediately revisiting a node in the
+ *        walk.
+ * \param q Float, control parameter to interpolate between breadth-first
+ *        strategy and depth-first strategy.
+ * \param len The number of nodes generated so far.  Note that the seed node is
+ *        always included as \c data[0], and the successors start from \c data[1].
+ * \param csr The CSR matrix
+ * \param prob Transition probability
+ * \param terminate Predicate for terminating the current random walk path.
+ * \return A tuple of ID of next successor (-1 if not exist), the edge ID traversed,
+ *         as well as whether to terminate.
+ */
+template <DLDeviceType XPU, typename IdxType>
+std::tuple<dgl_id_t, dgl_id_t, bool> Node2vecRandomWalkStep(
+    IdxType *data, dgl_id_t curr, dgl_id_t pre, const double p, const double q,
+    int64_t len, const CSRMatrix &csr, const FloatArray &probs,
+    TerminatePredicate<IdxType> terminate) {
+  const IdxType *offsets = csr.indptr.Ptr<IdxType>();
+  const IdxType *all_succ = csr.indices.Ptr<IdxType>();
+  const IdxType *all_eids = CSRHasData(csr) ? csr.data.Ptr<IdxType>() : nullptr;
+  const IdxType *succ = all_succ + offsets[curr];
+  const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr;
+  const int64_t size = offsets[curr + 1] - offsets[curr];
+  // Isolated node
+  if (size == 0) return std::make_tuple(-1, -1, true);
+  IdxType idx = 0;
+  // Normalize the weights to compute rejection probabilities
+  double max_prob = std::max({1 / p, 1.0, 1 / q});
+  // rejection prob for back to the previous node
+  double prob0 = 1 / p / max_prob;
+  // rejection prob for visiting the node with the distance of 1 between the
+  // previous node
+  double prob1 = 1 / max_prob;
+  // rejection prob for visiting the node with the distance of 2 between the
+  // previous node
+  double prob2 = 1 / q / max_prob;
+  dgl_id_t next_node;
+  double r;  // rejection probability.
+  if (IsNullArray(probs)) {
+    if (len == 0) {
+      idx = RandomEngine::ThreadLocal()->RandInt(size);
+      next_node = succ[idx];
+    } else {
+      while (true) {
+        idx = RandomEngine::ThreadLocal()->RandInt(size);
+        r = RandomEngine::ThreadLocal()->Uniform(0., 1.);
+        next_node = succ[idx];
+        if (next_node == pre) {
+          if (r < prob0) break;
+        } else if (has_edge_between<IdxType>(csr, next_node, pre)) {
+          if (r < prob1) break;
+        } else if (r < prob2) {
+          break;
+        }
+      }
+    }
+  } else {
+    FloatArray prob_selected;
+    ATEN_FLOAT_TYPE_SWITCH(probs->dtype, DType, "probability", {
+      prob_selected = FloatArray::Empty({size}, probs->dtype, probs->ctx);
+      DType *prob_selected_data = prob_selected.Ptr<DType>();
+      const DType *prob_etype_data = probs.Ptr<DType>();
+      for (int64_t j = 0; j < size; ++j)
+        prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]];
+    });
+    if (len == 0) {
+      idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
+      next_node = succ[idx];
+    } else {
+      while (true) {
+        idx = RandomEngine::ThreadLocal()->Choice<IdxType>(prob_selected);
+        r = RandomEngine::ThreadLocal()->Uniform(0., 1.);
+        next_node = succ[idx];
+        if (next_node == pre) {
+          if (r < prob0) break;
+        } else if (has_edge_between<IdxType>(csr, next_node, pre)) {
+          if (r < prob1) break;
+        } else if (r < prob2) {
+          break;
+        }
+      }
+    }
+  }
+  dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]);
+  return std::make_tuple(next_node, eid, terminate(data, next_node, len));
+}
+template <DLDeviceType XPU, typename IdxType>
+std::pair<IdArray, IdArray> Node2vecRandomWalk(
+    const HeteroGraphPtr g, const IdArray seeds,
+    const double p, const double q,
+    const int64_t max_num_steps, const FloatArray &prob,
+    TerminatePredicate<IdxType> terminate) {
+  const CSRMatrix &edges = g->GetCSRMatrix(0);  // homogeneous graph.
+  StepFunc<IdxType> step =
+    [&edges, &prob, p, q, terminate]
+    (IdxType *data, dgl_id_t curr, int64_t len) {
+      dgl_id_t pre = (len != 0) ? data[len - 1] : curr;
+      return Node2vecRandomWalkStep<XPU, IdxType>(data, curr, pre, p, q, len,
+                                                  edges, prob, terminate);
+    };
+  return GenericRandomWalk<XPU, IdxType>(seeds, max_num_steps, step);
+}
+};  // namespace
+};  // namespace impl
+};  // namespace sampling
+};      // namespace dgl
+#endif  // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_
--- a/src/graph/sampling/randomwalks/randomwalk_cpu.cc
+++ b/src/graph/sampling/randomwalks/randomwalk_cpu.cc
@@ -7,6 +7,7 @@
 #include <dgl/array.h>
 #include <dgl/base_heterograph.h>
 #include <vector>
+#include <utility>
 #include "randomwalks_impl.h"
 #include "randomwalks_cpu.h"
 #include "metapath_randomwalk.h"
@@ -21,7 +22,7 @@ namespace sampling {
 namespace impl {
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalk(
+std::pair<IdArray, IdArray> RandomWalk(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -35,13 +36,13 @@ IdArray RandomWalk(
 }
 template
-IdArray RandomWalk<kDLCPU, int32_t>(
+std::pair<IdArray, IdArray> RandomWalk<kDLCPU, int32_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
    const std::vector<FloatArray> &prob);
 template
-IdArray RandomWalk<kDLCPU, int64_t>(
+std::pair<IdArray, IdArray> RandomWalk<kDLCPU, int64_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,

--- a/src/graph/sampling/randomwalks/randomwalk_with_restart_cpu.cc
+++ b/src/graph/sampling/randomwalks/randomwalk_with_restart_cpu.cc
@@ -23,7 +23,7 @@ namespace sampling {
 namespace impl {
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalkWithRestart(
+std::pair<IdArray, IdArray> RandomWalkWithRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -37,14 +37,14 @@ IdArray RandomWalkWithRestart(
 }
 template
-IdArray RandomWalkWithRestart<kDLCPU, int32_t>(
+std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLCPU, int32_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
    const std::vector<FloatArray> &prob,
    double restart_prob);
 template
-IdArray RandomWalkWithRestart<kDLCPU, int64_t>(
+std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLCPU, int64_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -52,13 +52,13 @@ IdArray RandomWalkWithRestart<kDLCPU, int64_t>(
    double restart_prob);
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalkWithStepwiseRestart(
+std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
    const std::vector<FloatArray> &prob,
    FloatArray restart_prob) {
-  IdArray result;
+  std::pair<IdArray, IdArray> result;
  ATEN_FLOAT_TYPE_SWITCH(restart_prob->dtype, DType, "restart probability", {
    DType *restart_prob_data = static_cast<DType *>(restart_prob->data);
@@ -73,14 +73,14 @@ IdArray RandomWalkWithStepwiseRestart(
 }
 template
-IdArray RandomWalkWithStepwiseRestart<kDLCPU, int32_t>(
+std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLCPU, int32_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
    const std::vector<FloatArray> &prob,
    FloatArray restart_prob);
 template
-IdArray RandomWalkWithStepwiseRestart<kDLCPU, int64_t>(
+std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLCPU, int64_t>(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,

--- a/src/graph/sampling/randomwalks/randomwalks.cc
+++ b/src/graph/sampling/randomwalks/randomwalks.cc
@@ -42,7 +42,7 @@ void CheckRandomWalkInputs(
 };  // namespace
-std::pair<IdArray, TypeArray> RandomWalk(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalk(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -50,18 +50,18 @@ std::pair<IdArray, TypeArray> RandomWalk(
  CheckRandomWalkInputs(hg, seeds, metapath, prob);
  TypeArray vtypes;
-  IdArray vids;
+  std::pair<IdArray, IdArray> result;
  ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalk", {
    ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
      vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
-      vids = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob);
+      result = impl::RandomWalk<XPU, IdxType>(hg, seeds, metapath, prob);
    });
  });
-  return std::make_pair(vids, vtypes);
+  return std::make_tuple(result.first, result.second, vtypes);
 }
-std::pair<IdArray, TypeArray> RandomWalkWithRestart(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -71,18 +71,18 @@ std::pair<IdArray, TypeArray> RandomWalkWithRestart(
  CHECK(restart_prob >= 0 && restart_prob < 1) << "restart probability must belong to [0, 1)";
  TypeArray vtypes;
-  IdArray vids;
+  std::pair<IdArray, IdArray> result;
  ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithRestart", {
    ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
      vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
-      vids = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob);
+      result = impl::RandomWalkWithRestart<XPU, IdxType>(hg, seeds, metapath, prob, restart_prob);
    });
  });
-  return std::make_pair(vids, vtypes);
+  return std::make_tuple(result.first, result.second, vtypes);
 }
-std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart(
+std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithStepwiseRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -92,16 +92,16 @@ std::pair<IdArray, TypeArray> RandomWalkWithStepwiseRestart(
  // TODO(BarclayII): check the elements of restart probability
  TypeArray vtypes;
-  IdArray vids;
+  std::pair<IdArray, IdArray> result;
  ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "RandomWalkWithStepwiseRestart", {
    ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, {
      vtypes = impl::GetNodeTypesFromMetapath<XPU, IdxType>(hg, metapath);
-      vids = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>(
+      result = impl::RandomWalkWithStepwiseRestart<XPU, IdxType>(
          hg, seeds, metapath, prob, restart_prob);
    });
  });
-  return std::make_pair(vids, vtypes);
+  return std::make_tuple(result.first, result.second, vtypes);
 }
 };  // namespace sampling
@@ -117,8 +117,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalk")
    auto result = sampling::RandomWalk(hg.sptr(), seeds, metapath, prob_vec);
    List<Value> ret;
-    ret.push_back(Value(MakeValue(result.first)));
+    ret.push_back(Value(MakeValue(std::get<0>(result))));
-    ret.push_back(Value(MakeValue(result.second)));
+    ret.push_back(Value(MakeValue(std::get<1>(result))));
+    ret.push_back(Value(MakeValue(std::get<2>(result))));
    *rv = ret;
  });
@@ -135,8 +136,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithRestart
    auto result = sampling::RandomWalkWithRestart(
        hg.sptr(), seeds, metapath, prob_vec, restart_prob);
    List<Value> ret;
-    ret.push_back(Value(MakeValue(result.first)));
+    ret.push_back(Value(MakeValue(std::get<0>(result))));
-    ret.push_back(Value(MakeValue(result.second)));
+    ret.push_back(Value(MakeValue(std::get<1>(result))));
+    ret.push_back(Value(MakeValue(std::get<2>(result))));
    *rv = ret;
  });
@@ -153,8 +155,9 @@ DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithStepwis
    auto result = sampling::RandomWalkWithStepwiseRestart(
        hg.sptr(), seeds, metapath, prob_vec, restart_prob);
    List<Value> ret;
-    ret.push_back(Value(MakeValue(result.first)));
+    ret.push_back(Value(MakeValue(std::get<0>(result))));
-    ret.push_back(Value(MakeValue(result.second)));
+    ret.push_back(Value(MakeValue(std::get<1>(result))));
+    ret.push_back(Value(MakeValue(std::get<2>(result))));
    *rv = ret;
  });

--- a/src/graph/sampling/randomwalks/randomwalks_cpu.h
+++ b/src/graph/sampling/randomwalks/randomwalks_cpu.h
@@ -9,6 +9,8 @@
 #include <dgl/base_heterograph.h>
 #include <dgl/array.h>
+#include <tuple>
+#include <utility>
 #include "randomwalks_impl.h"
 namespace dgl {
@@ -32,16 +34,18 @@ namespace {
 * \note The graph itself should be bounded in the closure of \c step.
 */
 template<DLDeviceType XPU, typename IdxType>
-IdArray GenericRandomWalk(
+std::pair<IdArray, IdArray> GenericRandomWalk(
    const IdArray seeds,
    int64_t max_num_steps,
    StepFunc<IdxType> step) {
  int64_t num_seeds = seeds->shape[0];
  int64_t trace_length = max_num_steps + 1;
  IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, seeds->ctx);
+  IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, seeds->ctx);
-  const IdxType *seed_data = static_cast<IdxType *>(seeds->data);
+  const IdxType *seed_data = seeds.Ptr<IdxType>();
-  IdxType *traces_data = static_cast<IdxType *>(traces->data);
+  IdxType *traces_data = traces.Ptr<IdxType>();
+  IdxType *eids_data = eids.Ptr<IdxType>();
 #pragma omp parallel for
  for (int64_t seed_id = 0; seed_id < num_seeds; ++seed_id) {
@@ -51,16 +55,19 @@ IdArray GenericRandomWalk(
    for (i = 0; i < max_num_steps; ++i) {
      const auto &succ = step(traces_data + seed_id * max_num_steps, curr, i);
-      traces_data[seed_id * trace_length + i + 1] = curr = succ.first;
+      traces_data[seed_id * trace_length + i + 1] = curr = std::get<0>(succ);
-      if (succ.second)
+      eids_data[seed_id * max_num_steps + i] = std::get<1>(succ);
+      if (std::get<2>(succ))
        break;
    }
-    for (; i < max_num_steps; ++i)
+    for (; i < max_num_steps; ++i) {
      traces_data[seed_id * trace_length + i + 1] = -1;
+      eids_data[seed_id * max_num_steps + i] = -1;
+    }
  }
-  return traces;
+  return std::make_pair(traces, eids);
 }
 };  // namespace

--- a/src/graph/sampling/randomwalks/randomwalks_impl.h
+++ b/src/graph/sampling/randomwalks/randomwalks_impl.h
@@ -11,6 +11,7 @@
 #include <dgl/array.h>
 #include <vector>
 #include <utility>
+#include <tuple>
 #include <functional>
 namespace dgl {
@@ -27,8 +28,8 @@ namespace impl {
 */
 template<typename IdxType>
 using StepFunc = std::function<
-  //        ID        terminate?
+  //        ID        Edge ID   terminate?
-  std::pair<dgl_id_t, bool>(
+  std::tuple<dgl_id_t, dgl_id_t, bool>(
      IdxType *,    // node IDs generated so far
      dgl_id_t,     // last node ID
      int64_t)>;    // # of steps
@@ -52,11 +53,13 @@ TypeArray GetNodeTypesFromMetapath(
 *        each edge by edge type.  An empty float array assumes uniform transition.
 * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *         paths that terminated early are padded with -1.
+ *         A 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *         paths that terminated early are padded with -1.
 * \note This function should be called together with GetNodeTypesFromMetapath to
 *       determine the node type of each node in the random walk traces.
 */
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalk(
+std::pair<IdArray, IdArray> RandomWalk(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -73,11 +76,13 @@ IdArray RandomWalk(
 * \param restart_prob Restart probability
 * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *         paths that terminated early are padded with -1.
+ *         A 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *         paths that terminated early are padded with -1.
 * \note This function should be called together with GetNodeTypesFromMetapath to
 *       determine the node type of each node in the random walk traces.
 */
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalkWithRestart(
+std::pair<IdArray, IdArray> RandomWalkWithRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,
@@ -97,11 +102,13 @@ IdArray RandomWalkWithRestart(
 *        as \c metapath, indicating the probability to terminate after transition.
 * \return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs.  The
 *         paths that terminated early are padded with -1.
+ *         A 2D array of shape (len(seeds), len(metapath)) with edge IDs.  The
+ *         paths that terminated early are padded with -1.
 * \note This function should be called together with GetNodeTypesFromMetapath to
 *       determine the node type of each node in the random walk traces.
 */
 template<DLDeviceType XPU, typename IdxType>
-IdArray RandomWalkWithStepwiseRestart(
+std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart(
    const HeteroGraphPtr hg,
    const IdArray seeds,
    const TypeArray metapath,

--- a/tests/compute/test_sampling.py
+++ b/tests/compute/test_sampling.py
@@ -4,7 +4,7 @@ import numpy as np
 import unittest
 from collections import defaultdict
-def check_random_walk(g, metapath, traces, ntypes, prob=None):
+def check_random_walk(g, metapath, traces, ntypes, prob=None, trace_eids=None):
    traces = F.asnumpy(traces)
    ntypes = F.asnumpy(ntypes)
    for j in range(traces.shape[1] - 1):
@@ -19,6 +19,9 @@ def check_random_walk(g, metapath, traces, ntypes, prob=None):
                p = F.asnumpy(g.edges[metapath[j]].data['p'])
                eids = g.edge_ids(traces[i, j], traces[i, j+1], etype=metapath[j])
                assert p[eids] != 0
+            if trace_eids is not None:
+                u, v = g.find_edges(trace_eids[i, j], etype=metapath[j])
+                assert (u == traces[i, j]) and (v == traces[i, j + 1])
 @unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
 def test_random_walk():
@@ -42,10 +45,10 @@ def test_random_walk():
    g4.edges['follow'].data['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
    g4.edges['viewed-by'].data['p'] = F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32)
-    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4)
+    traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, return_eids=True)
-    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
+    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
-    traces, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0.)
+    traces, eids, ntypes = dgl.sampling.random_walk(g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=0., return_eids=True)
-    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
+    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
    traces, ntypes = dgl.sampling.random_walk(
        g1, [0, 1, 2, 0, 1, 2], length=4, restart_prob=F.zeros((4,), F.float32, F.cpu()))
    check_random_walk(g1, ['follow'] * 4, traces, ntypes)
@@ -56,13 +59,13 @@ def test_random_walk():
        g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5))
    assert (F.asnumpy(traces)[:, 5] == -1).all()
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4)
+        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, return_eids=True)
-    check_random_walk(g2, ['follow'] * 4, traces, ntypes)
+    check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids)
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p')
+        g2, [0, 1, 2, 3, 0, 1, 2, 3], length=4, prob='p', return_eids=True)
-    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p')
+    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
    try:
        traces, ntypes = dgl.sampling.random_walk(
@@ -73,32 +76,55 @@ def test_random_walk():
    assert fail
    metapath = ['follow', 'view', 'viewed-by'] * 2
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g3, [0, 1, 2, 0, 1, 2], metapath=metapath)
+        g3, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
-    check_random_walk(g3, metapath, traces, ntypes)
+    check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids)
    metapath = ['follow', 'view', 'viewed-by'] * 2
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath)
+        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, return_eids=True)
-    check_random_walk(g4, metapath, traces, ntypes)
+    check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
+    traces, eids, ntypes = dgl.sampling.random_walk(
+        g4, [0, 1, 2, 0, 1, 2], metapath=metapath, return_eids=True)
+    check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
    metapath = ['follow', 'view', 'viewed-by'] * 2
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p')
+        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', return_eids=True)
-    check_random_walk(g4, metapath, traces, ntypes, 'p')
+    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
-        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0.)
+        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p', restart_prob=0., return_eids=True)
-    check_random_walk(g4, metapath, traces, ntypes, 'p')
+    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath, prob='p',
-        restart_prob=F.zeros((6,), F.float32, F.cpu()))
+        restart_prob=F.zeros((6,), F.float32, F.cpu()), return_eids=True)
-    check_random_walk(g4, metapath, traces, ntypes, 'p')
+    check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
-    traces, ntypes = dgl.sampling.random_walk(
+    traces, eids, ntypes = dgl.sampling.random_walk(
        g4, [0, 1, 2, 3, 0, 1, 2, 3], metapath=metapath + ['follow'], prob='p',
-        restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32))
+        restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True)
-    check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p')
+    check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids)
    assert (F.asnumpy(traces[:, 7]) == -1).all()
+@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
+def test_node2vec():
+    g1 = dgl.heterograph({
+        ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
+        })
+    g2 = dgl.heterograph({
+        ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
+        })
+    g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
+    ntypes = F.zeros((5,), dtype=F.int64)
+    traces, eids = dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True)
+    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
+    traces, eids = dgl.sampling.node2vec_random_walk(
+        g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob='p', return_eids=True)
+    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
 @unittest.skipIf(F._default_context_str == 'gpu', reason="GPU pack traces not implemented")
 def test_pack_traces():
    traces, types = (np.array(