"vscode:/vscode.git/clone" did not exist on "e3ddbe25edeadaa5afc3f8f5bb0d645098a8b26a"
test_integration.py 18.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import dgl
import dgl.graphbolt as gb
import dgl.sparse as dglsp
import torch


def test_integration_link_prediction():
    torch.manual_seed(926)

    indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10])
    indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4])

    matrix_a = dglsp.from_csc(indptr, indices)
    node_pairs = torch.t(torch.stack(matrix_a.coo()))
    node_feature_data = torch.tensor(
        [
            [0.9634, 0.2294],
            [0.6172, 0.7865],
            [0.2109, 0.1089],
            [0.8672, 0.2276],
            [0.5503, 0.8223],
            [0.5160, 0.2486],
        ]
    )
    edge_feature_data = torch.tensor(
        [
            [0.5123, 0.1709, 0.6150],
            [0.1476, 0.1902, 0.1314],
            [0.2582, 0.5203, 0.6228],
            [0.3708, 0.7631, 0.2683],
            [0.2126, 0.7878, 0.7225],
            [0.7885, 0.3414, 0.5485],
            [0.4088, 0.8200, 0.1851],
            [0.0056, 0.9469, 0.4432],
            [0.8972, 0.7511, 0.3617],
            [0.5773, 0.2199, 0.3366],
        ]
    )

    item_set = gb.ItemSet(node_pairs, names="node_pairs")
41
    graph = gb.fused_csc_sampling_graph(indptr, indices)
42
43
44
45
46
47
48
49
50

    node_feature = gb.TorchBasedFeature(node_feature_data)
    edge_feature = gb.TorchBasedFeature(edge_feature_data)
    features = {
        ("node", None, "feat"): node_feature,
        ("edge", None, "feat"): edge_feature,
    }
    feature_store = gb.BasicFeatureStore(features)
    datapipe = gb.ItemSampler(item_set, batch_size=4)
51
    datapipe = datapipe.sample_uniform_negative(graph, 2)
52
53
54
55
56
57
    fanouts = torch.LongTensor([1])
    datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True)
    datapipe = datapipe.transform(gb.exclude_seed_edges)
    datapipe = datapipe.fetch_feature(
        feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"]
    )
58
    dataloader = gb.DataLoader(
59
60
61
62
        datapipe,
    )
    expected = [
        str(
63
            """MiniBatch(seed_nodes=None,
64
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 1, 2]),
65
                                                                         indices=tensor([0, 4]),
66
67
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 0, 4]),
68
69
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2, 0, 4]),
70
                            ),
71
72
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 1, 2]),
                                                                         indices=tensor([5, 4]),
73
74
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 0, 4]),
75
                                               original_edge_ids=None,
76
                                               original_column_node_ids=tensor([5, 3, 1, 2, 0, 4]),
77
78
79
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 1]),
                              tensor([2, 3, 3, 1])),
80
81
          node_pairs_with_labels=((tensor([0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]), tensor([2, 3, 3, 1, 4, 4, 1, 4, 0, 1, 1, 5])),
                                 tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])),
82
83
84
85
86
87
88
89
          node_pairs=(tensor([5, 3, 3, 3]),
                     tensor([1, 2, 2, 3])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.8672, 0.2276],
                                [0.6172, 0.7865],
                                [0.2109, 0.1089],
                                [0.9634, 0.2294],
                                [0.5503, 0.8223]])},
90
91
92
93
94
95
96
          negative_srcs=None,
          negative_node_pairs=(tensor([0, 0, 1, 1, 1, 1, 1, 1]),
                              tensor([4, 4, 1, 4, 0, 1, 1, 5])),
          negative_dsts=tensor([[0, 0],
                                [3, 0],
                                [5, 3],
                                [3, 4]]),
97
98
99
100
101
102
          labels=None,
          input_nodes=tensor([5, 3, 1, 2, 0, 4]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 1]),
                               tensor([2, 3, 3, 1])),
103
104
105
106
107
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[4, 4],
                                          [1, 4],
                                          [0, 1],
                                          [1, 5]]),
108
          blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2),
109
                 Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2)],
110
       )"""
111
112
        ),
        str(
113
            """MiniBatch(seed_nodes=None,
114
115
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 1, 2, 3]),
                                                                         indices=tensor([4, 1, 0]),
116
                                                           ),
117
                                               original_row_node_ids=tensor([3, 4, 0, 1, 5, 2]),
118
                                               original_edge_ids=None,
119
                                               original_column_node_ids=tensor([3, 4, 0, 1, 5, 2]),
120
                            ),
121
122
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 1, 2, 3]),
                                                                         indices=tensor([4, 4, 0]),
123
                                                           ),
124
                                               original_row_node_ids=tensor([3, 4, 0, 1, 5, 2]),
125
                                               original_edge_ids=None,
126
                                               original_column_node_ids=tensor([3, 4, 0, 1, 5, 2]),
127
128
129
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 2]),
                              tensor([0, 0, 1, 1])),
130
131
          node_pairs_with_labels=((tensor([0, 1, 1, 2, 0, 0, 1, 1, 1, 1, 2, 2]), tensor([0, 0, 1, 1, 3, 4, 5, 4, 1, 0, 3, 4])),
                                 tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.])),
132
133
134
135
136
          node_pairs=(tensor([3, 4, 4, 0]),
                     tensor([3, 3, 4, 4])),
          node_features={'feat': tensor([[0.8672, 0.2276],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294],
137
                                [0.6172, 0.7865],
138
                                [0.5160, 0.2486],
139
140
141
142
143
144
145
146
                                [0.2109, 0.1089]])},
          negative_srcs=None,
          negative_node_pairs=(tensor([0, 0, 1, 1, 1, 1, 2, 2]),
                              tensor([3, 4, 5, 4, 1, 0, 3, 4])),
          negative_dsts=tensor([[1, 5],
                                [2, 5],
                                [4, 3],
                                [1, 5]]),
147
          labels=None,
148
          input_nodes=tensor([3, 4, 0, 1, 5, 2]),
149
150
151
152
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 2]),
                               tensor([0, 0, 1, 1])),
153
154
155
156
157
158
159
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[3, 4],
                                          [5, 4],
                                          [1, 0],
                                          [3, 4]]),
          blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=3),
                 Block(num_src_nodes=6, num_dst_nodes=6, num_edges=3)],
160
       )"""
161
162
        ),
        str(
163
            """MiniBatch(seed_nodes=None,
164
165
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1, 2]),
                                                                         indices=tensor([1, 0]),
166
                                                           ),
167
                                               original_row_node_ids=tensor([5, 4, 0, 1]),
168
                                               original_edge_ids=None,
169
                                               original_column_node_ids=tensor([5, 4, 0, 1]),
170
                            ),
171
172
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1, 2]),
                                                                         indices=tensor([1, 0]),
173
                                                           ),
174
                                               original_row_node_ids=tensor([5, 4, 0, 1]),
175
                                               original_edge_ids=None,
176
                                               original_column_node_ids=tensor([5, 4, 0, 1]),
177
178
179
                            )],
          positive_node_pairs=(tensor([0, 1]),
                              tensor([0, 0])),
180
181
          node_pairs_with_labels=((tensor([0, 1, 0, 0, 1, 1]), tensor([0, 0, 2, 1, 2, 3])),
                                 tensor([1., 1., 0., 0., 0., 0.])),
182
183
184
          node_pairs=(tensor([5, 4]),
                     tensor([5, 5])),
          node_features={'feat': tensor([[0.5160, 0.2486],
185
186
187
188
189
190
191
192
                                [0.5503, 0.8223],
                                [0.9634, 0.2294],
                                [0.6172, 0.7865]])},
          negative_srcs=None,
          negative_node_pairs=(tensor([0, 0, 1, 1]),
                              tensor([2, 1, 2, 3])),
          negative_dsts=tensor([[0, 4],
                                [0, 1]]),
193
          labels=None,
194
          input_nodes=tensor([5, 4, 0, 1]),
195
196
197
198
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1]),
                               tensor([0, 0])),
199
200
201
202
203
          compacted_negative_srcs=None,
          compacted_negative_dsts=tensor([[2, 1],
                                          [2, 3]]),
          blocks=[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=2),
                 Block(num_src_nodes=4, num_dst_nodes=4, num_edges=2)],
204
       )"""
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
        ),
    ]
    for step, data in enumerate(dataloader):
        assert expected[step] == str(data), print(data)


def test_integration_node_classification():
    torch.manual_seed(926)

    indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10])
    indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4])

    matrix_a = dglsp.from_csc(indptr, indices)
    node_pairs = torch.t(torch.stack(matrix_a.coo()))
    node_feature_data = torch.tensor(
        [
            [0.9634, 0.2294],
            [0.6172, 0.7865],
            [0.2109, 0.1089],
            [0.8672, 0.2276],
            [0.5503, 0.8223],
            [0.5160, 0.2486],
        ]
    )
    edge_feature_data = torch.tensor(
        [
            [0.5123, 0.1709, 0.6150],
            [0.1476, 0.1902, 0.1314],
            [0.2582, 0.5203, 0.6228],
            [0.3708, 0.7631, 0.2683],
            [0.2126, 0.7878, 0.7225],
            [0.7885, 0.3414, 0.5485],
            [0.4088, 0.8200, 0.1851],
            [0.0056, 0.9469, 0.4432],
            [0.8972, 0.7511, 0.3617],
            [0.5773, 0.2199, 0.3366],
        ]
    )

    item_set = gb.ItemSet(node_pairs, names="node_pairs")
245
    graph = gb.fused_csc_sampling_graph(indptr, indices)
246
247
248
249
250
251
252
253
254
255
256
257
258
259

    node_feature = gb.TorchBasedFeature(node_feature_data)
    edge_feature = gb.TorchBasedFeature(edge_feature_data)
    features = {
        ("node", None, "feat"): node_feature,
        ("edge", None, "feat"): edge_feature,
    }
    feature_store = gb.BasicFeatureStore(features)
    datapipe = gb.ItemSampler(item_set, batch_size=4)
    fanouts = torch.LongTensor([1])
    datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True)
    datapipe = datapipe.fetch_feature(
        feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"]
    )
260
    dataloader = gb.DataLoader(
261
262
263
264
        datapipe,
    )
    expected = [
        str(
265
            """MiniBatch(seed_nodes=None,
266
267
268
269
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 3, 4]),
                                                                         indices=tensor([4, 1, 0, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2, 4]),
270
271
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2]),
272
                            ),
273
274
275
276
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 3, 4]),
                                                                         indices=tensor([0, 1, 0, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 3, 1, 2]),
277
278
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 3, 1, 2]),
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 1]),
                              tensor([2, 3, 3, 1])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([5, 3, 3, 3]),
                     tensor([1, 2, 2, 3])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.8672, 0.2276],
                                [0.6172, 0.7865],
                                [0.2109, 0.1089],
                                [0.5503, 0.8223]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([5, 3, 1, 2, 4]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 1]),
                               tensor([2, 3, 3, 1])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=5, num_dst_nodes=4, num_edges=4),
                 Block(num_src_nodes=4, num_dst_nodes=4, num_edges=4)],
       )"""
304
305
        ),
        str(
306
            """MiniBatch(seed_nodes=None,
307
308
309
310
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([3, 4, 0]),
311
312
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([3, 4, 0]),
313
                            ),
314
315
316
317
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([3, 4, 0]),
318
319
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([3, 4, 0]),
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
                            )],
          positive_node_pairs=(tensor([0, 1, 1, 2]),
                              tensor([0, 0, 1, 1])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([3, 4, 4, 0]),
                     tensor([3, 3, 4, 4])),
          node_features={'feat': tensor([[0.8672, 0.2276],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([3, 4, 0]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1, 1, 2]),
                               tensor([0, 0, 1, 1])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2),
                 Block(num_src_nodes=3, num_dst_nodes=3, num_edges=2)],
       )"""
343
344
        ),
        str(
345
            """MiniBatch(seed_nodes=None,
346
347
348
349
          sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2]),
                                                                         indices=tensor([0, 2]),
                                                           ),
                                               original_row_node_ids=tensor([5, 4, 0]),
350
351
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 4]),
352
                            ),
353
354
355
356
                            SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2]),
                                                                         indices=tensor([1, 1]),
                                                           ),
                                               original_row_node_ids=tensor([5, 4]),
357
358
                                               original_edge_ids=None,
                                               original_column_node_ids=tensor([5, 4]),
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
                            )],
          positive_node_pairs=(tensor([0, 1]),
                              tensor([0, 0])),
          node_pairs_with_labels=None,
          node_pairs=(tensor([5, 4]),
                     tensor([5, 5])),
          node_features={'feat': tensor([[0.5160, 0.2486],
                                [0.5503, 0.8223],
                                [0.9634, 0.2294]])},
          negative_srcs=None,
          negative_node_pairs=None,
          negative_dsts=None,
          labels=None,
          input_nodes=tensor([5, 4, 0]),
          edge_features=[{},
                        {}],
          compacted_node_pairs=(tensor([0, 1]),
                               tensor([0, 0])),
          compacted_negative_srcs=None,
          compacted_negative_dsts=None,
          blocks=[Block(num_src_nodes=3, num_dst_nodes=2, num_edges=2),
                 Block(num_src_nodes=2, num_dst_nodes=2, num_edges=2)],
       )"""
382
383
384
385
        ),
    ]
    for step, data in enumerate(dataloader):
        assert expected[step] == str(data), print(data)