randomwalk.py 5.1 KB
Newer Older
1
2
3

from ... import utils
from ... import backend as F
4
from ..._ffi.function import _init_api
5

6
7
8
9
__all__ = ['random_walk',
           'random_walk_with_restart',
           'bipartite_single_sided_random_walk_with_restart',
           ]
10
11
12
13
14
15
16
17


def random_walk(g, seeds, num_traces, num_hops):
    """Batch-generate random walk traces on given graph with the same length.

    Parameters
    ----------
    g : DGLGraph
18
        The graph.
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
    seeds : Tensor
        The node ID tensor from which the random walk traces starts.
    num_traces : int
        Number of traces to generate for each seed.
    num_hops : int
        Number of hops for each trace.

    Returns
    -------
    traces : Tensor
        A 3-dimensional node ID tensor with shape

            (num_seeds, num_traces, num_hops + 1)

        traces[i, j, 0] are always starting nodes (i.e. seed[i]).
    """
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
    if len(seeds) == 0:
        return utils.toindex([]).tousertensor()
    seeds = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLRandomWalk(g._graph._handle, seeds, num_traces, num_hops)
    return F.zerocopy_from_dlpack(traces.to_dlpack())


def _split_traces(traces):
    """Splits the flattened RandomWalkTraces structure into list of list
    of tensors.

    Parameters
    ----------
    traces : PackedFunc object of RandomWalkTraces structure

    Returns
    -------
    traces : list[list[Tensor]]
        traces[i][j] is the j-th trace generated for i-th seed.
    """
    trace_counts = F.zerocopy_to_numpy(
            F.zerocopy_from_dlpack(traces(0).to_dlpack())).tolist()
    trace_lengths = F.zerocopy_from_dlpack(traces(1).to_dlpack())
    trace_vertices = F.zerocopy_from_dlpack(traces(2).to_dlpack())

    trace_vertices = F.split(
            trace_vertices, F.zerocopy_to_numpy(trace_lengths).tolist(), 0)

    traces = []
    s = 0
    for c in trace_counts:
        traces.append(trace_vertices[s:s+c])
        s += c

    return traces


def random_walk_with_restart(
        g, seeds, restart_prob, max_nodes_per_seed,
        max_visit_counts=0, max_frequent_visited_nodes=0):
    """Batch-generate random walk traces on given graph with restart probability.

    Parameters
    ----------
    g : DGLGraph
        The graph.
    seeds : Tensor
        The node ID tensor from which the random walk traces starts.
    restart_prob : float
        Probability to stop a random walk after each step.
    max_nodes_per_seed : int
        Stop generating traces for a seed if the total number of nodes
        visited exceeds this number. [1]
    max_visit_counts : int, optional
    max_frequent_visited_nodes : int, optional
        Alternatively, stop generating traces for a seed if no less than
        ``max_frequent_visited_nodes`` are visited no less than
        ``max_visit_counts`` times.  [1]

    Returns
    -------
    traces : list[list[Tensor]]
        traces[i][j] is the j-th trace generated for i-th seed.

    Notes
    -----
    The traces does **not** include the seed nodes themselves.

    Reference
    ---------
    [1] Eksombatchai et al., 2017 https://arxiv.org/abs/1711.07601
    """
    if len(seeds) == 0:
        return []
    seeds = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLRandomWalkWithRestart(
            g._graph._handle, seeds, restart_prob, max_nodes_per_seed,
            max_visit_counts, max_frequent_visited_nodes)
    return _split_traces(traces)


def bipartite_single_sided_random_walk_with_restart(
        g, seeds, restart_prob, max_nodes_per_seed,
        max_visit_counts=0, max_frequent_visited_nodes=0):
    """Batch-generate random walk traces on given graph with restart probability.

    The graph must be a bipartite graph.

    A single random walk step involves two normal steps, so that the "visited"
    nodes always stay on the same side. [1]

    Parameters
    ----------
    g : DGLGraph
        The graph.
    seeds : Tensor
        The node ID tensor from which the random walk traces starts.
    restart_prob : float
        Probability to stop a random walk after each step.
    max_nodes_per_seed : int
        Stop generating traces for a seed if the total number of nodes
        visited exceeds this number. [1]
    max_visit_counts : int, optional
    max_frequent_visited_nodes : int, optional
        Alternatively, stop generating traces for a seed if no less than
        ``max_frequent_visited_nodes`` are visited no less than
        ``max_visit_counts`` times.  [1]

    Returns
    -------
    traces : list[list[Tensor]]
        traces[i][j] is the j-th trace generated for i-th seed.

    Notes
    -----
    The current implementation does not ensure that the graph is a bipartite
    graph.

    The traces does **not** include the seed nodes themselves.

    Reference
    ---------
    [1] Eksombatchai et al., 2017 https://arxiv.org/abs/1711.07601
    """
    if len(seeds) == 0:
        return []
    seeds = utils.toindex(seeds).todgltensor()
    traces = _CAPI_DGLBipartiteSingleSidedRandomWalkWithRestart(
            g._graph._handle, seeds, restart_prob, max_nodes_per_seed,
            max_visit_counts, max_frequent_visited_nodes)
    return _split_traces(traces)

_init_api('dgl.randomwalk', __name__)