test_heterograph-misc.py 16.3 KB
Newer Older
1
import math
2
import numbers
3
4
5
6
7

import backend as F

import dgl
import networkx as nx
8
import numpy as np
9
import pytest
10
import scipy.sparse as sp
11
from dgl import DGLError
12

13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
# graph generation: a random graph with 10 nodes
#  and 20 edges.
#  - has self loop
#  - no multi edge
def edge_pair_input(sort=False):
    if sort:
        src = [0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 9]
        dst = [4, 6, 9, 3, 5, 3, 7, 5, 8, 1, 3, 4, 9, 1, 9, 6, 2, 8, 9, 2]
        return src, dst
    else:
        src = [0, 0, 4, 5, 0, 4, 7, 4, 4, 3, 2, 7, 7, 5, 3, 2, 1, 9, 6, 1]
        dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5]
        return src, dst

28

29
30
31
32
33
34
35
def nx_input():
    g = nx.DiGraph()
    src, dst = edge_pair_input()
    for i, e in enumerate(zip(src, dst)):
        g.add_edge(*e, id=i)
    return g

36

37
38
39
40
def elist_input():
    src, dst = edge_pair_input()
    return list(zip(src, dst))

41

42
43
def scipy_coo_input():
    src, dst = edge_pair_input()
44
45
    return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10))

46
47
48

def scipy_csr_input():
    src, dst = edge_pair_input()
49
    csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)).tocsr()
50
51
52
53
54
    csr.sort_indices()
    # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
    # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
    return csr

55

56
def gen_by_mutation():
57
    g = dgl.graph([])
58
59
60
61
62
    src, dst = edge_pair_input()
    g.add_nodes(10)
    g.add_edges(src, dst)
    return g

63

64
65
def test_query():
    def _test_one(g):
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
66
67
        assert g.num_nodes() == 10
        assert g.num_edges() == 20
68
69

        for i in range(10):
70
71
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
72
        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
73
74
75

        src, dst = edge_pair_input()
        for u, v in zip(src, dst):
76
77
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
78
79
80
81
82
        assert F.allclose(
            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
        )
        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
        assert set(F.asnumpy(g.successors(2))) == set([7, 3])
83

84
85
        assert g.edge_ids(4, 4) == 5
        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0]))
86
87
88
89
90

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([5, 7, 4]))
        assert F.allclose(dst, F.tensor([9, 9, 4]))

91
        src, dst, eid = g.in_edges(9, form="all")
92
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
93
94
95
96
        assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)])
        src, dst, eid = g.in_edges(
            [9, 0, 8], form="all"
        )  # test node#0 has no in edges
97
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
98
99
100
        assert set(tup) == set(
            [(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)]
        )
101

102
        src, dst, eid = g.out_edges(0, form="all")
103
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
104
105
106
107
        assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)])
        src, dst, eid = g.out_edges(
            [0, 4, 8], form="all"
        )  # test node#8 has no out edges
108
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
109
110
111
112
113
114
115
116
117
118
119
120
121
        assert set(tup) == set(
            [
                (0, 9, 0),
                (0, 6, 1),
                (0, 4, 4),
                (4, 3, 2),
                (4, 4, 5),
                (4, 9, 7),
                (4, 1, 8),
            ]
        )

        src, dst, eid = g.edges("all", "eid")
122
123
124
125
126
127
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

128
        src, dst, eid = g.edges("all", "srcdst")
129
130
131
132
133
134
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))

135
136
        assert g.in_degrees(0) == 0
        assert g.in_degrees(9) == 4
137
        assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
138
139
        assert g.out_degrees(8) == 0
        assert g.out_degrees(9) == 1
140
141
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

Mufei Li's avatar
Mufei Li committed
142
        assert np.array_equal(
143
            F.sparse_to_numpy(g.adj_external(transpose=True)),
144
145
            scipy_coo_input().toarray().T,
        )
Mufei Li's avatar
Mufei Li committed
146
        assert np.array_equal(
147
            F.sparse_to_numpy(g.adj_external(transpose=False)),
148
149
            scipy_coo_input().toarray(),
        )
150
151
152
153
154
155
156

    def _test(g):
        # test twice to see whether the cached format works or not
        _test_one(g)
        _test_one(g)

    def _test_csr_one(g):
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
157
158
        assert g.num_nodes() == 10
        assert g.num_edges() == 20
159
160

        for i in range(10):
161
162
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
163
        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))
164
165
166

        src, dst = edge_pair_input(sort=True)
        for u, v in zip(src, dst):
167
168
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
169
170
171
172
173
        assert F.allclose(
            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
        )
        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
        assert set(F.asnumpy(g.successors(2))) == set([7, 3])
174
175
176
177

        # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
        # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
        # eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
178
179
        assert g.edge_ids(4, 4) == 11
        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([11, 2]))
180
181
182
183
184

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([1, 2, 2]))
        assert F.allclose(dst, F.tensor([3, 7, 3]))

185
        src, dst, eid = g.in_edges(9, form="all")
186
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
187
188
189
190
        assert set(tup) == set([(0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12)])
        src, dst, eid = g.in_edges(
            [9, 0, 8], form="all"
        )  # test node#0 has no in edges
191
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
192
193
194
195
196
197
198
199
200
201
202
203
        assert set(tup) == set(
            [
                (0, 9, 2),
                (5, 9, 14),
                (7, 9, 18),
                (4, 9, 12),
                (3, 8, 8),
                (7, 8, 17),
            ]
        )

        src, dst, eid = g.out_edges(0, form="all")
204
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
205
206
207
208
        assert set(tup) == set([(0, 9, 2), (0, 6, 1), (0, 4, 0)])
        src, dst, eid = g.out_edges(
            [0, 4, 8], form="all"
        )  # test node#8 has no out edges
209
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
210
211
212
213
214
215
216
217
218
219
220
221
222
        assert set(tup) == set(
            [
                (0, 9, 2),
                (0, 6, 1),
                (0, 4, 0),
                (4, 3, 10),
                (4, 4, 11),
                (4, 9, 12),
                (4, 1, 9),
            ]
        )

        src, dst, eid = g.edges("all", "eid")
223
224
225
226
227
228
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

229
        src, dst, eid = g.edges("all", "srcdst")
230
231
232
233
234
235
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))

236
237
        assert g.in_degrees(0) == 0
        assert g.in_degrees(9) == 4
238
        assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
239
240
        assert g.out_degrees(8) == 0
        assert g.out_degrees(9) == 1
241
242
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

Mufei Li's avatar
Mufei Li committed
243
        assert np.array_equal(
244
            F.sparse_to_numpy(g.adj_external(transpose=True)),
245
246
            scipy_coo_input().toarray().T,
        )
Mufei Li's avatar
Mufei Li committed
247
        assert np.array_equal(
248
            F.sparse_to_numpy(g.adj_external(transpose=False)),
249
250
            scipy_coo_input().toarray(),
        )
251
252
253
254
255
256

    def _test_csr(g):
        # test twice to see whether the cached format works or not
        _test_csr_one(g)
        _test_csr_one(g)

257
258
    def _test_edge_ids():
        g = gen_by_mutation()
259
        eids = g.edge_ids([4, 0], [4, 9])
260
        assert eids.shape[0] == 2
261
        eid = g.edge_ids(4, 4)
262
263
        assert isinstance(eid, numbers.Number)
        with pytest.raises(DGLError):
264
            eids = g.edge_ids([9, 0], [4, 9])
265

266
        with pytest.raises(DGLError):
267
            eid = g.edge_ids(4, 5)
268

269
        g.add_edges(0, 4)
270
        eids = g.edge_ids([0, 0], [4, 9])
271
        eid = g.edge_ids(0, 4)
272

273
    _test(gen_by_mutation())
274
275
276
    _test(dgl.graph(elist_input()))
    _test(dgl.from_scipy(scipy_coo_input()))
    _test_csr(dgl.from_scipy(scipy_csr_input()))
277
    _test_edge_ids()
278

279

280
def test_mutation():
281
    g = dgl.graph([])
282
    g = g.to(F.ctx())
283
284
    # test add nodes with data
    g.add_nodes(5)
285
    g.add_nodes(5, {"h": F.ones((5, 2))})
286
    ans = F.cat([F.zeros((5, 2)), F.ones((5, 2))], 0)
287
288
289
    assert F.allclose(ans, g.ndata["h"])
    g.ndata["w"] = 2 * F.ones((10, 2))
    assert F.allclose(2 * F.ones((10, 2)), g.ndata["w"])
290
291
    # test add edges with data
    g.add_edges([2, 3], [3, 4])
292
    g.add_edges([0, 1], [1, 2], {"m": F.ones((2, 2))})
293
    ans = F.cat([F.zeros((2, 2)), F.ones((2, 2))], 0)
294
295
    assert F.allclose(ans, g.edata["m"])

296

297
def test_scipy_adjmat():
298
    g = dgl.graph([])
299
300
301
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))

302
303
    adj_0 = g.adj_external(scipy_fmt="csr")
    adj_1 = g.adj_external(scipy_fmt="coo")
304
305
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

306
307
    adj_t0 = g.adj_external(transpose=False, scipy_fmt="csr")
    adj_t_1 = g.adj_external(transpose=False, scipy_fmt="coo")
308
309
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

310

311
def test_incmat():
312
    g = dgl.graph([])
313
    g.add_nodes(4)
314
315
316
317
318
319
320
321
    g.add_edges(0, 1)  # 0
    g.add_edges(0, 2)  # 1
    g.add_edges(0, 3)  # 2
    g.add_edges(2, 3)  # 3
    g.add_edges(1, 1)  # 4
    inc_in = F.sparse_to_numpy(g.incidence_matrix("in"))
    inc_out = F.sparse_to_numpy(g.incidence_matrix("out"))
    inc_both = F.sparse_to_numpy(g.incidence_matrix("both"))
322
323
324
325
    print(inc_in)
    print(inc_out)
    print(inc_both)
    assert np.allclose(
326
327
328
329
330
331
332
333
334
335
        inc_in,
        np.array(
            [
                [0.0, 0.0, 0.0, 0.0, 0.0],
                [1.0, 0.0, 0.0, 0.0, 1.0],
                [0.0, 1.0, 0.0, 0.0, 0.0],
                [0.0, 0.0, 1.0, 1.0, 0.0],
            ]
        ),
    )
336
    assert np.allclose(
337
338
339
340
341
342
343
344
345
346
        inc_out,
        np.array(
            [
                [1.0, 1.0, 1.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 1.0],
                [0.0, 0.0, 0.0, 1.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0],
            ]
        ),
    )
347
    assert np.allclose(
348
349
350
351
352
353
354
355
356
357
358
        inc_both,
        np.array(
            [
                [-1.0, -1.0, -1.0, 0.0, 0.0],
                [1.0, 0.0, 0.0, 0.0, 0.0],
                [0.0, 1.0, 0.0, -1.0, 0.0],
                [0.0, 0.0, 1.0, 1.0, 0.0],
            ]
        ),
    )

359

360
def test_find_edges():
361
    g = dgl.graph([])
362
363
364
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))
    e = g.find_edges([1, 3, 2, 4])
365
366
367
368
369
370
371
372
373
374
375
376
    assert (
        F.asnumpy(e[0][0]) == 1
        and F.asnumpy(e[0][1]) == 3
        and F.asnumpy(e[0][2]) == 2
        and F.asnumpy(e[0][3]) == 4
    )
    assert (
        F.asnumpy(e[1][0]) == 2
        and F.asnumpy(e[1][1]) == 4
        and F.asnumpy(e[1][2]) == 3
        and F.asnumpy(e[1][3]) == 5
    )
377
378
379
380
381
382
383
384
385

    try:
        g.find_edges([10])
        fail = False
    except DGLError:
        fail = True
    finally:
        assert fail

386

387
def test_ismultigraph():
388
    g = dgl.graph([])
389
390
391
392
393
394
395
396
397
    g.add_nodes(10)
    assert g.is_multigraph == False
    g.add_edges([0], [0])
    assert g.is_multigraph == False
    g.add_edges([1], [2])
    assert g.is_multigraph == False
    g.add_edges([0, 2], [0, 3])
    assert g.is_multigraph == True

398

399
def test_hypersparse_query():
400
    g = dgl.graph([])
401
402
403
404
    g = g.to(F.ctx())
    g.add_nodes(1000001)
    g.add_edges([0], [1])
    for i in range(10):
405
406
407
        assert g.has_nodes(i)
    assert not g.has_nodes(1000002)
    assert g.edge_ids(0, 1) == 0
408
    src, dst = g.find_edges([0])
409
410
    src, dst, eid = g.in_edges(1, form="all")
    src, dst, eid = g.out_edges(0, form="all")
411
    src, dst = g.edges()
412
413
414
415
    assert g.in_degrees(0) == 0
    assert g.in_degrees(1) == 1
    assert g.out_degrees(0) == 1
    assert g.out_degrees(1) == 0
416

417

418
def test_empty_data_initialized():
419
    g = dgl.graph([])
420
421
422
423
424
425
    g = g.to(F.ctx())
    g.ndata["ha"] = F.tensor([])
    g.add_nodes(1, {"hb": F.tensor([1])})
    assert "ha" in g.ndata
    assert len(g.ndata["ha"]) == 1

426

427
def test_is_sorted():
428
429
    u_src, u_dst = edge_pair_input(False)
    s_src, s_dst = edge_pair_input(True)
430

431
432
433
434
    u_src = F.tensor(u_src, dtype=F.int32)
    u_dst = F.tensor(u_dst, dtype=F.int32)
    s_src = F.tensor(s_src, dtype=F.int32)
    s_dst = F.tensor(s_dst, dtype=F.int32)
435

436
437
438
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
    assert src_sorted == False
    assert dst_sorted == False
439

440
441
442
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
    assert src_sorted == True
    assert dst_sorted == True
443

444
445
446
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
    assert src_sorted == False
    assert dst_sorted == False
447

448
449
450
    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
    assert src_sorted == True
    assert dst_sorted == False
451

452
453

def test_default_types():
454
    dg = dgl.graph([])
455
456
457
458
    g = dgl.graph(([], []))
    assert dg.ntypes == g.ntypes
    assert dg.etypes == g.etypes

459
460
461
462
463
464
465
466

def test_formats():
    g = dgl.rand_graph(10, 20)
    # in_degrees works if coo or csc available
    # out_degrees works if coo or csr available
    try:
        g.in_degrees()
        g.out_degrees()
467
468
469
470
        g.formats("coo").in_degrees()
        g.formats("coo").out_degrees()
        g.formats("csc").in_degrees()
        g.formats("csr").out_degrees()
471
472
473
474
475
476
477
        fail = False
    except DGLError:
        fail = True
    finally:
        assert not fail
    # in_degrees NOT works if csc available only
    try:
478
        g.formats("csc").out_degrees()
479
480
481
482
483
484
485
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail
    # out_degrees NOT works if csr available only
    try:
486
        g.formats("csr").in_degrees()
487
488
489
490
491
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail
492

493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
    # If the intersection of created formats and allowed formats is
    # not empty, then retain the intersection.
    # Case1: intersection is not empty and intersected is equal to
    # created formats.
    g = g.formats(["coo", "csr"])
    g.create_formats_()
    g = g.formats(["coo", "csr", "csc"])
    assert sorted(g.formats()["created"]) == sorted(["coo", "csr"])
    assert sorted(g.formats()["not created"]) == sorted(["csc"])

    # Case2: intersection is not empty and intersected is not equal
    # to created formats.
    g = g.formats(["coo", "csr"])
    g.create_formats_()
    g = g.formats(["coo", "csc"])
    assert sorted(g.formats()["created"]) == sorted(["coo"])
    assert sorted(g.formats()["not created"]) == sorted(["csc"])

    # If the intersection of created formats and allowed formats is
    # empty, then create a format in the order of `coo` -> `csr` ->
    # `csc`.
    # Case1: intersection is empty and just one format is allowed.
    g = g.formats(["coo", "csr"])
    g.create_formats_()
    g = g.formats(["csc"])
    assert sorted(g.formats()["created"]) == sorted(["csc"])
    assert sorted(g.formats()["not created"]) == sorted([])

    # Case2: intersection is empty and more than one format is allowed.
    g = g.formats("csc")
    g.create_formats_()
    g = g.formats(["csr", "coo"])
    assert sorted(g.formats()["created"]) == sorted(["coo"])
    assert sorted(g.formats()["not created"]) == sorted(["csr"])

528
529

if __name__ == "__main__":
530
531
    test_query()
    test_mutation()
532
    test_scipy_adjmat()
533
    test_incmat()
534
    test_find_edges()
535
    test_hypersparse_query()
536
    test_is_sorted()
537
    test_default_types()
538
    test_formats()