test_integration.py 18.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import dgl
import dgl.graphbolt as gb
import dgl.sparse as dglsp
import torch


def test_integration_link_prediction():
    torch.manual_seed(926)

    indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10])
    indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4])

    matrix_a = dglsp.from_csc(indptr, indices)
    node_pairs = torch.t(torch.stack(matrix_a.coo()))
    node_feature_data = torch.tensor(
        [
            [0.9634, 0.2294],
            [0.6172, 0.7865],
            [0.2109, 0.1089],
            [0.8672, 0.2276],
            [0.5503, 0.8223],
            [0.5160, 0.2486],
        ]
    )
    edge_feature_data = torch.tensor(
        [
            [0.5123, 0.1709, 0.6150],
            [0.1476, 0.1902, 0.1314],
            [0.2582, 0.5203, 0.6228],
            [0.3708, 0.7631, 0.2683],
            [0.2126, 0.7878, 0.7225],
            [0.7885, 0.3414, 0.5485],
            [0.4088, 0.8200, 0.1851],
            [0.0056, 0.9469, 0.4432],
            [0.8972, 0.7511, 0.3617],
            [0.5773, 0.2199, 0.3366],
        ]
    )

    item_set = gb.ItemSet(node_pairs, names="node_pairs")
41
    graph = gb.fused_csc_sampling_graph(indptr, indices)
42
43
44
45
46
47
48
49
50

    node_feature = gb.TorchBasedFeature(node_feature_data)
    edge_feature = gb.TorchBasedFeature(edge_feature_data)
    features = {
        ("node", None, "feat"): node_feature,
        ("edge", None, "feat"): edge_feature,
    }
    feature_store = gb.BasicFeatureStore(features)
    datapipe = gb.ItemSampler(item_set, batch_size=4)
51
    datapipe = datapipe.sample_uniform_negative(graph, 2)
52
53
54
55
56
57
    fanouts = torch.LongTensor([1])
    datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True)
    datapipe = datapipe.transform(gb.exclude_seed_edges)
    datapipe = datapipe.fetch_feature(
        feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"]
    )
58
    dataloader = gb.DataLoader(
59
60
61
62
        datapipe,
    )
    expected = [
        str(
63
            """MiniBatch(seed_nodes=None,
64
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 1, 2]),
65
                                                                         indices=tensor([0, 4]),
66
67
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 0, 4]),
68
69
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2, 0, 4]),
70
                            ),
71
72
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 1, 2]),
                                                                         indices=tensor([5, 4]),
73
74
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 0, 4]),
75
                                               original_edge_ids=None,
76
                                               original_column_node_ids=tensor([5, 3, 1, 2, 0, 4]),
77
78
79
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 1]),
                              tensor([2, 3, 3, 1])),
80
81
          node_pairs_with_labels=((tensor([0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]), tensor([2, 3, 3, 1, 4, 4, 1, 4, 0, 1, 1, 5])),
                                 tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])),
82
83
84
85
86
87
88
89
          node_pairs=(tensor([5, 3, 3, 3]),
                     tensor([1, 2, 2, 3])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.8672, 0.2276],
                                [0.6172, 0.7865],
                                [0.2109, 0.1089],
                                [0.9634, 0.2294],
                                [0.5503, 0.8223]])},
90
          negative_srcs=None,
91
92
93
94
95
96
97
98
          negative_node_pairs=(tensor([[0, 0],
                                      [1, 1],
                                      [1, 1],
                                      [1, 1]]),
                              tensor([[4, 4],
                                      [1, 4],
                                      [0, 1],
                                      [1, 5]])),
99
100
101
102
          negative_dsts=tensor([[0, 0],
                                [3, 0],
                                [5, 3],
                                [3, 4]]),
103
104
105
106
107
108
          labels=None,
          input_nodes=tensor([5, 3, 1, 2, 0, 4]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 1]),
                               tensor([2, 3, 3, 1])),
109
110
111
112
113
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[4, 4],
                                          [1, 4],
                                          [0, 1],
                                          [1, 5]]),
114
          blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2),
115
                 Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2)],
116
       )"""
117
118
        ),
        str(
119
            """MiniBatch(seed_nodes=None,
120
121
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 1, 2, 3]),
                                                                         indices=tensor([4, 1, 0]),
122
                                                           ),
123
                                               original_row_node_ids=tensor([3, 4, 0, 1, 5, 2]),
124
                                               original_edge_ids=None,
125
                                               original_column_node_ids=tensor([3, 4, 0, 1, 5, 2]),
126
                            ),
127
128
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 1, 2, 3]),
                                                                         indices=tensor([4, 4, 0]),
129
                                                           ),
130
                                               original_row_node_ids=tensor([3, 4, 0, 1, 5, 2]),
131
                                               original_edge_ids=None,
132
                                               original_column_node_ids=tensor([3, 4, 0, 1, 5, 2]),
133
134
135
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 2]),
                              tensor([0, 0, 1, 1])),
136
137
          node_pairs_with_labels=((tensor([0, 1, 1, 2, 0, 0, 1, 1, 1, 1, 2, 2]), tensor([0, 0, 1, 1, 3, 4, 5, 4, 1, 0, 3, 4])),
                                 tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])),
138
139
140
141
142
          node_pairs=(tensor([3, 4, 4, 0]),
                     tensor([3, 3, 4, 4])),
          node_features={'feat': tensor([[0.8672, 0.2276],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294],
143
                                [0.6172, 0.7865],
144
                                [0.5160, 0.2486],
145
146
                                [0.2109, 0.1089]])},
          negative_srcs=None,
147
148
149
150
151
152
153
154
          negative_node_pairs=(tensor([[0, 0],
                                      [1, 1],
                                      [1, 1],
                                      [2, 2]]),
                              tensor([[3, 4],
                                      [5, 4],
                                      [1, 0],
                                      [3, 4]])),
155
156
157
158
          negative_dsts=tensor([[1, 5],
                                [2, 5],
                                [4, 3],
                                [1, 5]]),
159
          labels=None,
160
          input_nodes=tensor([3, 4, 0, 1, 5, 2]),
161
162
163
164
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 2]),
                               tensor([0, 0, 1, 1])),
165
166
167
168
169
170
171
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[3, 4],
                                          [5, 4],
                                          [1, 0],
                                          [3, 4]]),
          blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=3),
                 Block(num_src_nodes=6, num_dst_nodes=6, num_edges=3)],
172
       )"""
173
174
        ),
        str(
175
            """MiniBatch(seed_nodes=None,
176
177
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1, 2]),
                                                                         indices=tensor([1, 0]),
178
                                                           ),
179
                                               original_row_node_ids=tensor([5, 4, 0, 1]),
180
                                               original_edge_ids=None,
181
                                               original_column_node_ids=tensor([5, 4, 0, 1]),
182
                            ),
183
184
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1, 2]),
                                                                         indices=tensor([1, 0]),
185
                                                           ),
186
                                               original_row_node_ids=tensor([5, 4, 0, 1]),
187
                                               original_edge_ids=None,
188
                                               original_column_node_ids=tensor([5, 4, 0, 1]),
189
190
191
                            )],
          positive_node_pairs=(tensor([0, 1]),
                              tensor([0, 0])),
192
193
          node_pairs_with_labels=((tensor([0, 1, 0, 0, 1, 1]), tensor([0, 0, 2, 1, 2, 3])),
                                 tensor([1., 1., 0., 0., 0., 0.])),
194
195
196
          node_pairs=(tensor([5, 4]),
                     tensor([5, 5])),
          node_features={'feat': tensor([[0.5160, 0.2486],
197
198
199
200
                                [0.5503, 0.8223],
                                [0.9634, 0.2294],
                                [0.6172, 0.7865]])},
          negative_srcs=None,
201
202
203
204
          negative_node_pairs=(tensor([[0, 0],
                                      [1, 1]]),
                              tensor([[2, 1],
                                      [2, 3]])),
205
206
          negative_dsts=tensor([[0, 4],
                                [0, 1]]),
207
          labels=None,
208
          input_nodes=tensor([5, 4, 0, 1]),
209
210
211
212
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1]),
                               tensor([0, 0])),
213
214
215
216
217
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[2, 1],
                                          [2, 3]]),
          blocks=[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=2),
                 Block(num_src_nodes=4, num_dst_nodes=4, num_edges=2)],
218
       )"""
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
        ),
    ]
    for step, data in enumerate(dataloader):
        assert expected[step] == str(data), print(data)


def test_integration_node_classification():
    torch.manual_seed(926)

    indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10])
    indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4])

    matrix_a = dglsp.from_csc(indptr, indices)
    node_pairs = torch.t(torch.stack(matrix_a.coo()))
    node_feature_data = torch.tensor(
        [
            [0.9634, 0.2294],
            [0.6172, 0.7865],
            [0.2109, 0.1089],
            [0.8672, 0.2276],
            [0.5503, 0.8223],
            [0.5160, 0.2486],
        ]
    )
    edge_feature_data = torch.tensor(
        [
            [0.5123, 0.1709, 0.6150],
            [0.1476, 0.1902, 0.1314],
            [0.2582, 0.5203, 0.6228],
            [0.3708, 0.7631, 0.2683],
            [0.2126, 0.7878, 0.7225],
            [0.7885, 0.3414, 0.5485],
            [0.4088, 0.8200, 0.1851],
            [0.0056, 0.9469, 0.4432],
            [0.8972, 0.7511, 0.3617],
            [0.5773, 0.2199, 0.3366],
        ]
    )

    item_set = gb.ItemSet(node_pairs, names="node_pairs")
259
    graph = gb.fused_csc_sampling_graph(indptr, indices)
260
261
262
263
264
265
266
267
268
269
270
271
272
273

    node_feature = gb.TorchBasedFeature(node_feature_data)
    edge_feature = gb.TorchBasedFeature(edge_feature_data)
    features = {
        ("node", None, "feat"): node_feature,
        ("edge", None, "feat"): edge_feature,
    }
    feature_store = gb.BasicFeatureStore(features)
    datapipe = gb.ItemSampler(item_set, batch_size=4)
    fanouts = torch.LongTensor([1])
    datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True)
    datapipe = datapipe.fetch_feature(
        feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"]
    )
274
    dataloader = gb.DataLoader(
275
276
277
278
        datapipe,
    )
    expected = [
        str(
279
            """MiniBatch(seed_nodes=None,
280
281
282
283
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 3, 4]),
                                                                         indices=tensor([4, 1, 0, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 4]),
284
285
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2]),
286
                            ),
287
288
289
290
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 3, 4]),
                                                                         indices=tensor([0, 1, 0, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2]),
291
292
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2]),
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 1]),
                              tensor([2, 3, 3, 1])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([5, 3, 3, 3]),
                     tensor([1, 2, 2, 3])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.8672, 0.2276],
                                [0.6172, 0.7865],
                                [0.2109, 0.1089],
                                [0.5503, 0.8223]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([5, 3, 1, 2, 4]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 1]),
                               tensor([2, 3, 3, 1])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=5, num_dst_nodes=4, num_edges=4),
                 Block(num_src_nodes=4, num_dst_nodes=4, num_edges=4)],
       )"""
318
319
        ),
        str(
320
            """MiniBatch(seed_nodes=None,
321
322
323
324
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([3, 4, 0]),
325
326
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([3, 4, 0]),
327
                            ),
328
329
330
331
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([3, 4, 0]),
332
333
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([3, 4, 0]),
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 2]),
                              tensor([0, 0, 1, 1])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([3, 4, 4, 0]),
                     tensor([3, 3, 4, 4])),
          node_features={'feat': tensor([[0.8672, 0.2276],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([3, 4, 0]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 2]),
                               tensor([0, 0, 1, 1])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2),
                 Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2)],
       )"""
357
358
        ),
        str(
359
            """MiniBatch(seed_nodes=None,
360
361
362
363
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([5, 4, 0]),
364
365
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 4]),
366
                            ),
367
368
369
370
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2]),
                                                                         indices=tensor([1, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 4]),
371
372
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 4]),
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
                            )],
          positive_node_pairs=(tensor([0, 1]),
                              tensor([0, 0])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([5, 4]),
                     tensor([5, 5])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([5, 4, 0]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1]),
                               tensor([0, 0])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=3, num_dst_nodes=2, num_edges=2),
                 Block(num_src_nodes=2, num_dst_nodes=2, num_edges=2)],
       )"""
396
397
398
399
        ),
    ]
    for step, data in enumerate(dataloader):
        assert expected[step] == str(data), print(data)