test_sampler.py 7.04 KB
Newer Older
1
import backend as F
Da Zheng's avatar
Da Zheng committed
2
3
4
5
6
import numpy as np
import scipy as sp
import dgl
from dgl import utils

7
8
np.random.seed(42)

Da Zheng's avatar
Da Zheng committed
9
10
11
12
def generate_rand_graph(n):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
    return dgl.DGLGraph(arr, readonly=True)

13
14
15
def test_create_full():
    g = generate_rand_graph(100)
    full_nf = dgl.contrib.sampling.sampler.create_full_nodeflow(g, 5)
Da Zheng's avatar
Da Zheng committed
16
    assert full_nf.number_of_nodes() == g.number_of_nodes() * 6
17
18
    assert full_nf.number_of_edges() == g.number_of_edges() * 5

Da Zheng's avatar
Da Zheng committed
19
20
21
def test_1neighbor_sampler_all():
    g = generate_rand_graph(100)
    # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
22
    for i, subg in enumerate(dgl.contrib.sampling.NeighborSampler(
Da Zheng's avatar
Da Zheng committed
23
            g, 1, g.number_of_nodes(), neighbor_type='in', num_workers=4)):
24
        seed_ids = subg.layer_parent_nid(-1)
Da Zheng's avatar
Da Zheng committed
25
        assert len(seed_ids) == 1
26
        src, dst, eid = g.in_edges(seed_ids, form='all')
Da Zheng's avatar
Da Zheng committed
27
28
        assert subg.number_of_nodes() == len(src) + 1
        assert subg.number_of_edges() == len(src)
Da Zheng's avatar
Da Zheng committed
29

Da Zheng's avatar
Da Zheng committed
30
31
32
        assert seed_ids == subg.layer_parent_nid(-1)
        child_src, child_dst, child_eid = subg.in_edges(subg.layer_nid(-1), form='all')
        assert F.array_equal(child_src, subg.layer_nid(0))
Da Zheng's avatar
Da Zheng committed
33

Da Zheng's avatar
Da Zheng committed
34
35
        src1 = subg.map_to_parent_nid(child_src)
        assert F.array_equal(src1, src)
Da Zheng's avatar
Da Zheng committed
36
37

def is_sorted(arr):
38
    return np.sum(np.sort(arr) == arr, 0) == len(arr)
Da Zheng's avatar
Da Zheng committed
39
40

def verify_subgraph(g, subg, seed_id):
Da Zheng's avatar
Da Zheng committed
41
42
43
44
    seed_id = F.asnumpy(seed_id)
    seeds = F.asnumpy(subg.map_to_parent_nid(subg.layer_nid(-1)))
    assert seed_id in seeds
    child_seed = F.asnumpy(subg.layer_nid(-1))[seeds == seed_id]
45
    src, dst, eid = g.in_edges(seed_id, form='all')
Da Zheng's avatar
Da Zheng committed
46
47
    child_src, child_dst, child_eid = subg.in_edges(child_seed, form='all')

48
    child_src = F.asnumpy(child_src)
Da Zheng's avatar
Da Zheng committed
49
50
51
52
53
    # We don't allow duplicate elements in the neighbor list.
    assert(len(np.unique(child_src)) == len(child_src))
    # The neighbor list also needs to be sorted.
    assert(is_sorted(child_src))

Da Zheng's avatar
Da Zheng committed
54
55
56
    # a neighbor in the subgraph must also exist in parent graph.
    for i in subg.map_to_parent_nid(child_src):
        assert i in src
Da Zheng's avatar
Da Zheng committed
57
58
59
60

def test_1neighbor_sampler():
    g = generate_rand_graph(100)
    # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
61
62
63
    for subg in dgl.contrib.sampling.NeighborSampler(g, 1, 5, neighbor_type='in',
                                                     num_workers=4):
        seed_ids = subg.layer_parent_nid(-1)
Da Zheng's avatar
Da Zheng committed
64
65
66
67
68
        assert len(seed_ids) == 1
        assert subg.number_of_nodes() <= 6
        assert subg.number_of_edges() <= 5
        verify_subgraph(g, subg, seed_ids)

69
70
71
def test_prefetch_neighbor_sampler():
    g = generate_rand_graph(100)
    # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
72
73
74
    for subg in dgl.contrib.sampling.NeighborSampler(g, 1, 5, neighbor_type='in',
                                                     num_workers=4, prefetch=True):
        seed_ids = subg.layer_parent_nid(-1)
75
76
77
78
79
        assert len(seed_ids) == 1
        assert subg.number_of_nodes() <= 6
        assert subg.number_of_edges() <= 5
        verify_subgraph(g, subg, seed_ids)

Da Zheng's avatar
Da Zheng committed
80
81
82
def test_10neighbor_sampler_all():
    g = generate_rand_graph(100)
    # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
Da Zheng's avatar
Da Zheng committed
83
84
    for subg in dgl.contrib.sampling.NeighborSampler(g, 10, g.number_of_nodes(),
                                                     neighbor_type='in', num_workers=4):
85
        seed_ids = subg.layer_parent_nid(-1)
Da Zheng's avatar
Da Zheng committed
86
        assert F.array_equal(seed_ids, subg.map_to_parent_nid(subg.layer_nid(-1)))
Da Zheng's avatar
Da Zheng committed
87

Da Zheng's avatar
Da Zheng committed
88
89
90
91
        src, dst, eid = g.in_edges(seed_ids, form='all')
        child_src, child_dst, child_eid = subg.in_edges(subg.layer_nid(-1), form='all')
        src1 = subg.map_to_parent_nid(child_src)
        assert F.array_equal(src1, src)
Da Zheng's avatar
Da Zheng committed
92
93
94

def check_10neighbor_sampler(g, seeds):
    # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
95
96
97
    for subg in dgl.contrib.sampling.NeighborSampler(g, 10, 5, neighbor_type='in',
                                                     num_workers=4, seed_nodes=seeds):
        seed_ids = subg.layer_parent_nid(-1)
Da Zheng's avatar
Da Zheng committed
98
99
100
101
102
103
104
105
106
107
108
        assert subg.number_of_nodes() <= 6 * len(seed_ids)
        assert subg.number_of_edges() <= 5 * len(seed_ids)
        for seed_id in seed_ids:
            verify_subgraph(g, subg, seed_id)

def test_10neighbor_sampler():
    g = generate_rand_graph(100)
    check_10neighbor_sampler(g, None)
    check_10neighbor_sampler(g, seeds=np.unique(np.random.randint(0, g.number_of_nodes(),
                                                                  size=int(g.number_of_nodes() / 10))))

109
def _test_layer_sampler(prefetch=False):
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    g = generate_rand_graph(100)
    nid = g.nodes()
    src, dst, eid = g.all_edges(form='all', order='eid')
    n_batches = 5
    batch_size = 50
    seed_batches = [np.sort(np.random.choice(F.asnumpy(nid), batch_size, replace=False))
                    for i in range(n_batches)]
    seed_nodes = np.hstack(seed_batches)
    layer_sizes = [50] * 3
    LayerSampler = getattr(dgl.contrib.sampling, 'LayerSampler')
    sampler = LayerSampler(g, batch_size, layer_sizes, 'in',
                           seed_nodes=seed_nodes, num_workers=4, prefetch=prefetch)
    for sub_g in sampler:
        assert all(sub_g.layer_size(i) < size for i, size in enumerate(layer_sizes))
        sub_nid = F.arange(0, sub_g.number_of_nodes())
        assert all(np.all(np.isin(F.asnumpy(sub_g.layer_nid(i)), F.asnumpy(sub_nid)))
                   for i in range(sub_g.num_layers))
        assert np.all(np.isin(F.asnumpy(sub_g.map_to_parent_nid(sub_nid)),
                              F.asnumpy(nid)))
        sub_eid = F.arange(0, sub_g.number_of_edges())
        assert np.all(np.isin(F.asnumpy(sub_g.map_to_parent_eid(sub_eid)),
                              F.asnumpy(eid)))
        assert any(np.all(np.sort(F.asnumpy(sub_g.layer_parent_nid(-1))) == seed_batch)
                   for seed_batch in seed_batches)

        sub_src, sub_dst = sub_g.all_edges(order='eid')
        for i in range(sub_g.num_blocks):
            block_eid = sub_g.block_eid(i)
            block_src = sub_g.map_to_parent_nid(sub_src[block_eid])
            block_dst = sub_g.map_to_parent_nid(sub_dst[block_eid])

            block_parent_eid = sub_g.block_parent_eid(i)
            block_parent_src = src[block_parent_eid]
            block_parent_dst = dst[block_parent_eid]

            assert np.all(F.asnumpy(block_src == block_parent_src))

        n_layers = sub_g.num_layers
        sub_n = sub_g.number_of_nodes()
        assert sum(F.shape(sub_g.layer_nid(i))[0] for i in range(n_layers)) == sub_n
        n_blocks = sub_g.num_blocks
        sub_m = sub_g.number_of_edges()
        assert sum(F.shape(sub_g.block_eid(i))[0] for i in range(n_blocks)) == sub_m

Da Zheng's avatar
Da Zheng committed
154
155
156
157
def test_layer_sampler():
    _test_layer_sampler()
    _test_layer_sampler(prefetch=True)

Da Zheng's avatar
Da Zheng committed
158
if __name__ == '__main__':
159
    test_create_full()
Da Zheng's avatar
Da Zheng committed
160
161
162
163
    test_1neighbor_sampler_all()
    test_10neighbor_sampler_all()
    test_1neighbor_sampler()
    test_10neighbor_sampler()
Da Zheng's avatar
Da Zheng committed
164
    test_layer_sampler()