test_dataloader.py 13.3 KB
Newer Older
1
2
3
4
5
import dgl
import backend as F
import unittest
from torch.utils.data import DataLoader
from collections import defaultdict
6
from collections.abc import Iterator
7
from itertools import product
8

9
def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
10
11
    seeds = defaultdict(list)

12
13
    for item in dl:
        if mode == 'node':
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
14
            input_nodes, output_nodes, blocks = item
15
        elif mode == 'edge':
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
16
            input_nodes, pair_graph, blocks = item
17
18
            output_nodes = pair_graph.ndata[dgl.NID]
        elif mode == 'link':
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
19
            input_nodes, pair_graph, neg_graph, blocks = item
20
21
22
23
            output_nodes = pair_graph.ndata[dgl.NID]
            for ntype in pair_graph.ntypes:
                assert F.array_equal(pair_graph.nodes[ntype].data[dgl.NID], neg_graph.nodes[ntype].data[dgl.NID])

24
25
26
27
28
29
30
        if len(g.ntypes) > 1:
            for ntype in g.ntypes:
                assert F.array_equal(input_nodes[ntype], blocks[0].srcnodes[ntype].data[dgl.NID])
                assert F.array_equal(output_nodes[ntype], blocks[-1].dstnodes[ntype].data[dgl.NID])
        else:
            assert F.array_equal(input_nodes, blocks[0].srcdata[dgl.NID])
            assert F.array_equal(output_nodes, blocks[-1].dstdata[dgl.NID])
31

32
33
34
35
36
37
38
        prev_dst = {ntype: None for ntype in g.ntypes}
        for block in blocks:
            for canonical_etype in block.canonical_etypes:
                utype, etype, vtype = canonical_etype
                uu, vv = block.all_edges(order='eid', etype=canonical_etype)
                src = block.srcnodes[utype].data[dgl.NID]
                dst = block.dstnodes[vtype].data[dgl.NID]
39
40
41
42
                assert F.array_equal(
                    block.srcnodes[utype].data['feat'], g.nodes[utype].data['feat'][src])
                assert F.array_equal(
                    block.dstnodes[vtype].data['feat'], g.nodes[vtype].data['feat'][dst])
43
44
45
46
47
48
                if prev_dst[utype] is not None:
                    assert F.array_equal(src, prev_dst[utype])
                u = src[uu]
                v = dst[vv]
                assert F.asnumpy(g.has_edges_between(u, v, etype=canonical_etype)).all()
                eid = block.edges[canonical_etype].data[dgl.EID]
49
50
51
                assert F.array_equal(
                    block.edges[canonical_etype].data['feat'],
                    g.edges[canonical_etype].data['feat'][eid])
52
53
54
55
56
57
58
59
60
                ufound, vfound = g.find_edges(eid, etype=canonical_etype)
                assert F.array_equal(ufound, u)
                assert F.array_equal(vfound, v)
            for ntype in block.dsttypes:
                src = block.srcnodes[ntype].data[dgl.NID]
                dst = block.dstnodes[ntype].data[dgl.NID]
                assert F.array_equal(src[:block.number_of_dst_nodes(ntype)], dst)
                prev_dst[ntype] = dst

61
62
63
64
65
66
67
68
        if mode == 'node':
            for ntype in blocks[-1].dsttypes:
                seeds[ntype].append(blocks[-1].dstnodes[ntype].data[dgl.NID])
        elif mode == 'edge' or mode == 'link':
            for etype in pair_graph.canonical_etypes:
                seeds[etype].append(pair_graph.edges[etype].data[dgl.EID])

    # Check if all nodes/edges are iterated
69
70
    seeds = {k: F.cat(v, 0) for k, v in seeds.items()}
    for k, v in seeds.items():
71
72
73
74
75
76
77
        if k in nids:
            seed_set = set(F.asnumpy(nids[k]))
        elif isinstance(k, tuple) and k[1] in nids:
            seed_set = set(F.asnumpy(nids[k[1]]))
        else:
            continue

78
79
80
81
        v_set = set(F.asnumpy(v))
        assert v_set == seed_set

def test_neighbor_sampler_dataloader():
82
    g = dgl.heterograph({('user', 'follow', 'user'): ([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])},
83
                        {'user': 6}).long()
84
    g = dgl.to_bidirected(g).to(F.ctx())
85
86
    g.ndata['feat'] = F.randn((6, 8))
    g.edata['feat'] = F.randn((10, 4))
87
88
89
    reverse_eids = F.tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], dtype=F.int64)
    g_sampler1 = dgl.dataloading.MultiLayerNeighborSampler([2, 2], return_eids=True)
    g_sampler2 = dgl.dataloading.MultiLayerFullNeighborSampler(2, return_eids=True)
90
91

    hg = dgl.heterograph({
92
93
94
95
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
96
    }).long().to(F.ctx())
97
98
99
100
    for ntype in hg.ntypes:
        hg.nodes[ntype].data['feat'] = F.randn((hg.number_of_nodes(ntype), 8))
    for etype in hg.canonical_etypes:
        hg.edges[etype].data['feat'] = F.randn((hg.number_of_edges(etype), 4))
101
102
103
104
105
106
107
108
109
110
111
112
    hg_sampler1 = dgl.dataloading.MultiLayerNeighborSampler(
        [{'play': 1, 'played-by': 1, 'follow': 2, 'followed-by': 1}] * 2, return_eids=True)
    hg_sampler2 = dgl.dataloading.MultiLayerFullNeighborSampler(2, return_eids=True)
    reverse_etypes = {'follow': 'followed-by', 'followed-by': 'follow', 'play': 'played-by', 'played-by': 'play'}

    collators = []
    graphs = []
    nids = []
    modes = []
    for seeds, sampler in product(
            [F.tensor([0, 1, 2, 3, 5], dtype=F.int64), F.tensor([4, 5], dtype=F.int64)],
            [g_sampler1, g_sampler2]):
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
113
        collators.append(dgl.dataloading.NodeCollator(g, seeds, sampler))
114
115
116
117
        graphs.append(g)
        nids.append({'user': seeds})
        modes.append('node')

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
118
        collators.append(dgl.dataloading.EdgeCollator(g, seeds, sampler))
119
120
121
122
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('edge')

123
124
125
126
127
128
        collators.append(dgl.dataloading.EdgeCollator(
            g, seeds, sampler, exclude='self'))
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('edge')

129
        collators.append(dgl.dataloading.EdgeCollator(
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
130
            g, seeds, sampler, exclude='reverse_id', reverse_eids=reverse_eids))
131
132
133
134
135
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('edge')

        collators.append(dgl.dataloading.EdgeCollator(
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
136
            g, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
137
138
139
140
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('link')

141
142
143
144
145
146
        collators.append(dgl.dataloading.EdgeCollator(
            g, seeds, sampler, exclude='self', negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('link')

147
148
        collators.append(dgl.dataloading.EdgeCollator(
            g, seeds, sampler, exclude='reverse_id', reverse_eids=reverse_eids,
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
149
            negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
150
151
152
153
154
155
156
157
        graphs.append(g)
        nids.append({'follow': seeds})
        modes.append('link')

    for seeds, sampler in product(
            [{'user': F.tensor([0, 1, 3, 5], dtype=F.int64), 'game': F.tensor([0, 1, 2], dtype=F.int64)},
             {'user': F.tensor([4, 5], dtype=F.int64), 'game': F.tensor([0, 1, 2], dtype=F.int64)}],
            [hg_sampler1, hg_sampler2]):
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
158
        collators.append(dgl.dataloading.NodeCollator(hg, seeds, sampler))
159
160
161
162
163
164
165
166
        graphs.append(hg)
        nids.append(seeds)
        modes.append('node')

    for seeds, sampler in product(
            [{'follow': F.tensor([0, 1, 3, 5], dtype=F.int64), 'play': F.tensor([1, 3], dtype=F.int64)},
             {'follow': F.tensor([4, 5], dtype=F.int64), 'play': F.tensor([1, 3], dtype=F.int64)}],
            [hg_sampler1, hg_sampler2]):
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
167
        collators.append(dgl.dataloading.EdgeCollator(hg, seeds, sampler))
168
169
170
171
172
        graphs.append(hg)
        nids.append(seeds)
        modes.append('edge')

        collators.append(dgl.dataloading.EdgeCollator(
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
173
            hg, seeds, sampler, exclude='reverse_types', reverse_etypes=reverse_etypes))
174
175
176
177
178
        graphs.append(hg)
        nids.append(seeds)
        modes.append('edge')

        collators.append(dgl.dataloading.EdgeCollator(
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
179
            hg, seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
180
181
182
183
184
185
        graphs.append(hg)
        nids.append(seeds)
        modes.append('link')

        collators.append(dgl.dataloading.EdgeCollator(
            hg, seeds, sampler, exclude='reverse_types', reverse_etypes=reverse_etypes,
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
186
            negative_sampler=dgl.dataloading.negative_sampler.Uniform(2)))
187
188
189
190
191
        graphs.append(hg)
        nids.append(seeds)
        modes.append('link')

    for _g, nid, collator, mode in zip(graphs, nids, collators, modes):
192
193
        dl = DataLoader(
            collator.dataset, collate_fn=collator.collate, batch_size=2, shuffle=True, drop_last=False)
194
        assert isinstance(iter(dl), Iterator)
195
        _check_neighbor_sampling_dataloader(_g, nid, dl, mode, collator)
196

197
198
199
200
201
def test_graph_dataloader():
    batch_size = 16
    num_batches = 2
    minigc_dataset = dgl.data.MiniGCDataset(batch_size * num_batches, 10, 20)
    data_loader = dgl.dataloading.GraphDataLoader(minigc_dataset, batch_size=batch_size, shuffle=True)
202
    assert isinstance(iter(data_loader), Iterator)
203
204
205
    for graph, label in data_loader:
        assert isinstance(graph, dgl.DGLGraph)
        assert F.asnumpy(label).shape[0] == batch_size
206

207
208
209
210
211
212
213
214
215
216
217
218
219
def _check_device(data):
    if isinstance(data, dict):
        for k, v in data.items():
            assert v.device == F.ctx()
    elif isinstance(data, list):
        for v in data:
            assert v.device == F.ctx()
    else:
        assert data.device == F.ctx()

def test_node_dataloader():
    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)

220
221
    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
222
223
224
225
226
227
228
229
230
231
232
233
234

    dataloader = dgl.dataloading.NodeDataLoader(
        g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes())
    for input_nodes, output_nodes, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(output_nodes)
        _check_device(blocks)

    g2 = dgl.heterograph({
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
235
    }).to(F.ctx())
236
    for ntype in g2.ntypes:
237
        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
238
239
240
241
242
    batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)

    dataloader = dgl.dataloading.NodeDataLoader(
        g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
        sampler, device=F.ctx(), batch_size=batch_size)
243
    assert isinstance(iter(dataloader), Iterator)
244
245
246
247
248
249
250
251
252
    for input_nodes, output_nodes, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(output_nodes)
        _check_device(blocks)

def test_edge_dataloader():
    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
    neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)

253
254
    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
255

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
256
    # no negative sampler
257
258
259
260
261
262
263
    dataloader = dgl.dataloading.EdgeDataLoader(
        g1, g1.edges(form='eid'), sampler, device=F.ctx(), batch_size=g1.num_edges())
    for input_nodes, pos_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(blocks)

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
264
    # negative sampler
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    dataloader = dgl.dataloading.EdgeDataLoader(
        g1, g1.edges(form='eid'), sampler, device=F.ctx(),
        negative_sampler=neg_sampler, batch_size=g1.num_edges())
    for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(neg_pair_graph)
        _check_device(blocks)

    g2 = dgl.heterograph({
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
279
    }).to(F.ctx())
280
    for ntype in g2.ntypes:
281
        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
282
283
    batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
284
    # no negative sampler
285
286
    dataloader = dgl.dataloading.EdgeDataLoader(
        g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
287
288
        sampler, device=F.ctx(), batch_size=batch_size)
    for input_nodes, pos_pair_graph, blocks in dataloader:
289
290
291
292
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(blocks)

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
293
    # negative sampler
294
295
296
    dataloader = dgl.dataloading.EdgeDataLoader(
        g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
        sampler, device=F.ctx(), negative_sampler=neg_sampler,
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
297
        batch_size=batch_size)
298

299
    assert isinstance(iter(dataloader), Iterator)
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
300
    for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
301
302
303
304
305
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(neg_pair_graph)
        _check_device(blocks)

306
307
if __name__ == '__main__':
    test_neighbor_sampler_dataloader()
308
    test_graph_dataloader()
309
310
    test_node_dataloader()
    test_edge_dataloader()