test_heterograph-misc.py 15.3 KB
Newer Older
1
import math
2
import numbers
3
4
5
6
7

import backend as F

import dgl
import networkx as nx
8
import numpy as np
9
import pytest
10
import scipy.sparse as sp
11
from dgl import DGLError
12

13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
# graph generation: a random graph with 10 nodes
#  and 20 edges.
#  - has self loop
#  - no multi edge
def edge_pair_input(sort=False):
    if sort:
        src = [0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 9]
        dst = [4, 6, 9, 3, 5, 3, 7, 5, 8, 1, 3, 4, 9, 1, 9, 6, 2, 8, 9, 2]
        return src, dst
    else:
        src = [0, 0, 4, 5, 0, 4, 7, 4, 4, 3, 2, 7, 7, 5, 3, 2, 1, 9, 6, 1]
        dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5]
        return src, dst

28

29
30
31
32
33
34
35
def nx_input():
    g = nx.DiGraph()
    src, dst = edge_pair_input()
    for i, e in enumerate(zip(src, dst)):
        g.add_edge(*e, id=i)
    return g

36

37
38
39
40
def elist_input():
    src, dst = edge_pair_input()
    return list(zip(src, dst))

41

42
43
def scipy_coo_input():
    src, dst = edge_pair_input()
44
45
    return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10))

46
47
48

def scipy_csr_input():
    src, dst = edge_pair_input()
49
    csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)).tocsr()
50
51
52
53
54
    csr.sort_indices()
    # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
    # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
    return csr

55

56
57
58
59
60
61
62
def gen_by_mutation():
    g = dgl.DGLGraph()
    src, dst = edge_pair_input()
    g.add_nodes(10)
    g.add_edges(src, dst)
    return g

63

Da Zheng's avatar
Da Zheng committed
64
65
def gen_from_data(data, readonly, sort):
    return dgl.DGLGraph(data, readonly=readonly, sort_csr=True)
66

67

68
69
def test_query():
    def _test_one(g):
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
70
71
        assert g.num_nodes() == 10
        assert g.num_edges() == 20
72
73

        for i in range(10):
74
75
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
76
        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
77
78
79

        src, dst = edge_pair_input()
        for u, v in zip(src, dst):
80
81
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
82
83
84
85
86
        assert F.allclose(
            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
        )
        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
        assert set(F.asnumpy(g.successors(2))) == set([7, 3])
87

88
89
        assert g.edge_ids(4, 4) == 5
        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0]))
90
91
92
93
94

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([5, 7, 4]))
        assert F.allclose(dst, F.tensor([9, 9, 4]))

95
        src, dst, eid = g.in_edges(9, form="all")
96
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
97
98
99
100
        assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)])
        src, dst, eid = g.in_edges(
            [9, 0, 8], form="all"
        )  # test node#0 has no in edges
101
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
102
103
104
        assert set(tup) == set(
            [(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)]
        )
105

106
        src, dst, eid = g.out_edges(0, form="all")
107
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
108
109
110
111
        assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)])
        src, dst, eid = g.out_edges(
            [0, 4, 8], form="all"
        )  # test node#8 has no out edges
112
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
113
114
115
116
117
118
119
120
121
122
123
124
125
        assert set(tup) == set(
            [
                (0, 9, 0),
                (0, 6, 1),
                (0, 4, 4),
                (4, 3, 2),
                (4, 4, 5),
                (4, 9, 7),
                (4, 1, 8),
            ]
        )

        src, dst, eid = g.edges("all", "eid")
126
127
128
129
130
131
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

132
        src, dst, eid = g.edges("all", "srcdst")
133
134
135
136
137
138
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))

139
140
        assert g.in_degrees(0) == 0
        assert g.in_degrees(9) == 4
141
        assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
142
143
        assert g.out_degrees(8) == 0
        assert g.out_degrees(9) == 1
144
145
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

Mufei Li's avatar
Mufei Li committed
146
        assert np.array_equal(
147
            F.sparse_to_numpy(g.adj_external(transpose=True)),
148
149
            scipy_coo_input().toarray().T,
        )
Mufei Li's avatar
Mufei Li committed
150
        assert np.array_equal(
151
            F.sparse_to_numpy(g.adj_external(transpose=False)),
152
153
            scipy_coo_input().toarray(),
        )
154
155
156
157
158
159
160

    def _test(g):
        # test twice to see whether the cached format works or not
        _test_one(g)
        _test_one(g)

    def _test_csr_one(g):
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
161
162
        assert g.num_nodes() == 10
        assert g.num_edges() == 20
163
164

        for i in range(10):
165
166
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
167
        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
168
169
170

        src, dst = edge_pair_input(sort=True)
        for u, v in zip(src, dst):
171
172
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
173
174
175
176
177
        assert F.allclose(
            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
        )
        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
        assert set(F.asnumpy(g.successors(2))) == set([7, 3])
178
179
180
181

        # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
        # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
        # eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
182
183
        assert g.edge_ids(4, 4) == 11
        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([11, 2]))
184
185
186
187
188

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([1, 2, 2]))
        assert F.allclose(dst, F.tensor([3, 7, 3]))

189
        src, dst, eid = g.in_edges(9, form="all")
190
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
191
192
193
194
        assert set(tup) == set([(0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12)])
        src, dst, eid = g.in_edges(
            [9, 0, 8], form="all"
        )  # test node#0 has no in edges
195
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
196
197
198
199
200
201
202
203
204
205
206
207
        assert set(tup) == set(
            [
                (0, 9, 2),
                (5, 9, 14),
                (7, 9, 18),
                (4, 9, 12),
                (3, 8, 8),
                (7, 8, 17),
            ]
        )

        src, dst, eid = g.out_edges(0, form="all")
208
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
209
210
211
212
        assert set(tup) == set([(0, 9, 2), (0, 6, 1), (0, 4, 0)])
        src, dst, eid = g.out_edges(
            [0, 4, 8], form="all"
        )  # test node#8 has no out edges
213
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
214
215
216
217
218
219
220
221
222
223
224
225
226
        assert set(tup) == set(
            [
                (0, 9, 2),
                (0, 6, 1),
                (0, 4, 0),
                (4, 3, 10),
                (4, 4, 11),
                (4, 9, 12),
                (4, 1, 9),
            ]
        )

        src, dst, eid = g.edges("all", "eid")
227
228
229
230
231
232
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

233
        src, dst, eid = g.edges("all", "srcdst")
234
235
236
237
238
239
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))

240
241
        assert g.in_degrees(0) == 0
        assert g.in_degrees(9) == 4
242
        assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
243
244
        assert g.out_degrees(8) == 0
        assert g.out_degrees(9) == 1
245
246
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

Mufei Li's avatar
Mufei Li committed
247
        assert np.array_equal(
248
            F.sparse_to_numpy(g.adj_external(transpose=True)),
249
250
            scipy_coo_input().toarray().T,
        )
Mufei Li's avatar
Mufei Li committed
251
        assert np.array_equal(
252
            F.sparse_to_numpy(g.adj_external(transpose=False)),
253
254
            scipy_coo_input().toarray(),
        )
255
256
257
258
259
260

    def _test_csr(g):
        # test twice to see whether the cached format works or not
        _test_csr_one(g)
        _test_csr_one(g)

261
262
    def _test_edge_ids():
        g = gen_by_mutation()
263
        eids = g.edge_ids([4, 0], [4, 9])
264
        assert eids.shape[0] == 2
265
        eid = g.edge_ids(4, 4)
266
267
        assert isinstance(eid, numbers.Number)
        with pytest.raises(DGLError):
268
            eids = g.edge_ids([9, 0], [4, 9])
269

270
        with pytest.raises(DGLError):
271
            eid = g.edge_ids(4, 5)
272

273
        g.add_edges(0, 4)
274
        eids = g.edge_ids([0, 0], [4, 9])
275
        eid = g.edge_ids(0, 4)
276

277
    _test(gen_by_mutation())
Da Zheng's avatar
Da Zheng committed
278
279
280
281
282
283
284
285
    _test(gen_from_data(elist_input(), False, False))
    _test(gen_from_data(elist_input(), True, False))
    _test(gen_from_data(elist_input(), True, True))
    _test(gen_from_data(scipy_coo_input(), False, False))
    _test(gen_from_data(scipy_coo_input(), True, False))

    _test_csr(gen_from_data(scipy_csr_input(), False, False))
    _test_csr(gen_from_data(scipy_csr_input(), True, False))
286
    _test_edge_ids()
287

288

289
def test_mutation():
290
    g = dgl.DGLGraph()
291
    g = g.to(F.ctx())
292
293
    # test add nodes with data
    g.add_nodes(5)
294
    g.add_nodes(5, {"h": F.ones((5, 2))})
295
    ans = F.cat([F.zeros((5, 2)), F.ones((5, 2))], 0)
296
297
298
    assert F.allclose(ans, g.ndata["h"])
    g.ndata["w"] = 2 * F.ones((10, 2))
    assert F.allclose(2 * F.ones((10, 2)), g.ndata["w"])
299
300
    # test add edges with data
    g.add_edges([2, 3], [3, 4])
301
    g.add_edges([0, 1], [1, 2], {"m": F.ones((2, 2))})
302
    ans = F.cat([F.zeros((2, 2)), F.ones((2, 2))], 0)
303
304
    assert F.allclose(ans, g.edata["m"])

305

306
307
308
309
310
def test_scipy_adjmat():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))

311
312
    adj_0 = g.adj_external(scipy_fmt="csr")
    adj_1 = g.adj_external(scipy_fmt="coo")
313
314
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

315
316
    adj_t0 = g.adj_external(transpose=False, scipy_fmt="csr")
    adj_t_1 = g.adj_external(transpose=False, scipy_fmt="coo")
317
318
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

319

320
321
322
def test_incmat():
    g = dgl.DGLGraph()
    g.add_nodes(4)
323
324
325
326
327
328
329
330
    g.add_edges(0, 1)  # 0
    g.add_edges(0, 2)  # 1
    g.add_edges(0, 3)  # 2
    g.add_edges(2, 3)  # 3
    g.add_edges(1, 1)  # 4
    inc_in = F.sparse_to_numpy(g.incidence_matrix("in"))
    inc_out = F.sparse_to_numpy(g.incidence_matrix("out"))
    inc_both = F.sparse_to_numpy(g.incidence_matrix("both"))
331
332
333
334
    print(inc_in)
    print(inc_out)
    print(inc_both)
    assert np.allclose(
335
336
337
338
339
340
341
342
343
344
        inc_in,
        np.array(
            [
                [0.0, 0.0, 0.0, 0.0, 0.0],
                [1.0, 0.0, 0.0, 0.0, 1.0],
                [0.0, 1.0, 0.0, 0.0, 0.0],
                [0.0, 0.0, 1.0, 1.0, 0.0],
            ]
        ),
    )
345
    assert np.allclose(
346
347
348
349
350
351
352
353
354
355
        inc_out,
        np.array(
            [
                [1.0, 1.0, 1.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 1.0],
                [0.0, 0.0, 0.0, 1.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0],
            ]
        ),
    )
356
    assert np.allclose(
357
358
359
360
361
362
363
364
365
366
367
        inc_both,
        np.array(
            [
                [-1.0, -1.0, -1.0, 0.0, 0.0],
                [1.0, 0.0, 0.0, 0.0, 0.0],
                [0.0, 1.0, 0.0, -1.0, 0.0],
                [0.0, 0.0, 1.0, 1.0, 0.0],
            ]
        ),
    )

368

369
370
371
372
373
def test_find_edges():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))
    e = g.find_edges([1, 3, 2, 4])
374
375
376
377
378
379
380
381
382
383
384
385
    assert (
        F.asnumpy(e[0][0]) == 1
        and F.asnumpy(e[0][1]) == 3
        and F.asnumpy(e[0][2]) == 2
        and F.asnumpy(e[0][3]) == 4
    )
    assert (
        F.asnumpy(e[1][0]) == 2
        and F.asnumpy(e[1][1]) == 4
        and F.asnumpy(e[1][2]) == 3
        and F.asnumpy(e[1][3]) == 5
    )
386
387
388
389
390
391
392
393
394

    try:
        g.find_edges([10])
        fail = False
    except DGLError:
        fail = True
    finally:
        assert fail

395

396
397
398
399
400
401
402
403
404
405
406
def test_ismultigraph():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    assert g.is_multigraph == False
    g.add_edges([0], [0])
    assert g.is_multigraph == False
    g.add_edges([1], [2])
    assert g.is_multigraph == False
    g.add_edges([0, 2], [0, 3])
    assert g.is_multigraph == True

407

408
409
410
411
412
413
def test_hypersparse_query():
    g = dgl.DGLGraph()
    g = g.to(F.ctx())
    g.add_nodes(1000001)
    g.add_edges([0], [1])
    for i in range(10):
414
415
416
        assert g.has_nodes(i)
    assert not g.has_nodes(1000002)
    assert g.edge_ids(0, 1) == 0
417
    src, dst = g.find_edges([0])
418
419
    src, dst, eid = g.in_edges(1, form="all")
    src, dst, eid = g.out_edges(0, form="all")
420
    src, dst = g.edges()
421
422
423
424
    assert g.in_degrees(0) == 0
    assert g.in_degrees(1) == 1
    assert g.out_degrees(0) == 1
    assert g.out_degrees(1) == 0
425

426

427
428
429
430
431
432
433
434
def test_empty_data_initialized():
    g = dgl.DGLGraph()
    g = g.to(F.ctx())
    g.ndata["ha"] = F.tensor([])
    g.add_nodes(1, {"hb": F.tensor([1])})
    assert "ha" in g.ndata
    assert len(g.ndata["ha"]) == 1

435

436
def test_is_sorted():
437
438
    u_src, u_dst = edge_pair_input(False)
    s_src, s_dst = edge_pair_input(True)
439

440
441
442
443
    u_src = F.tensor(u_src, dtype=F.int32)
    u_dst = F.tensor(u_dst, dtype=F.int32)
    s_src = F.tensor(s_src, dtype=F.int32)
    s_dst = F.tensor(s_dst, dtype=F.int32)
444

445
446
447
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
    assert src_sorted == False
    assert dst_sorted == False
448

449
450
451
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
    assert src_sorted == True
    assert dst_sorted == True
452

453
454
455
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
    assert src_sorted == False
    assert dst_sorted == False
456

457
458
459
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
    assert src_sorted == True
    assert dst_sorted == False
460

461
462
463
464
465
466
467

def test_default_types():
    dg = dgl.DGLGraph()
    g = dgl.graph(([], []))
    assert dg.ntypes == g.ntypes
    assert dg.etypes == g.etypes

468
469
470
471
472
473
474
475

def test_formats():
    g = dgl.rand_graph(10, 20)
    # in_degrees works if coo or csc available
    # out_degrees works if coo or csr available
    try:
        g.in_degrees()
        g.out_degrees()
476
477
478
479
        g.formats("coo").in_degrees()
        g.formats("coo").out_degrees()
        g.formats("csc").in_degrees()
        g.formats("csr").out_degrees()
480
481
482
483
484
485
486
        fail = False
    except DGLError:
        fail = True
    finally:
        assert not fail
    # in_degrees NOT works if csc available only
    try:
487
        g.formats("csc").out_degrees()
488
489
490
491
492
493
494
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail
    # out_degrees NOT works if csr available only
    try:
495
        g.formats("csr").in_degrees()
496
497
498
499
500
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail
501

502
503

if __name__ == "__main__":
504
505
    test_query()
    test_mutation()
506
    test_scipy_adjmat()
507
    test_incmat()
508
    test_find_edges()
509
    test_hypersparse_query()
510
    test_is_sorted()
511
    test_default_types()
512
    test_formats()