test_transform.py 31.2 KB
Newer Older
1
from scipy import sparse as spsp
2
import unittest
3
4
5
6
import networkx as nx
import numpy as np
import dgl
import dgl.function as fn
7
import backend as F
8
from dgl.graph_index import from_scipy_sparse_matrix
9
import unittest
10
from utils import parametrize_dtype
11
12
13
14

D = 5

# line graph related
15

16
17
18
def test_line_graph():
    N = 5
    G = dgl.DGLGraph(nx.star_graph(N))
19
    G.edata['h'] = F.randn((2 * N, D))
20
21
22
    n_edges = G.number_of_edges()
    L = G.line_graph(shared=True)
    assert L.number_of_nodes() == 2 * N
23
    L.ndata['h'] = F.randn((2 * N, D))
24
25
26
27
28
    # update node features on line graph should reflect to edge features on
    # original graph.
    u = [0, 0, 2, 3]
    v = [1, 2, 0, 0]
    eid = G.edge_ids(u, v)
29
30
    L.nodes[eid].data['h'] = F.zeros((4, D))
    assert F.allclose(G.edges[u, v].data['h'], F.zeros((4, D)))
31
32
33

    # adding a new node feature on line graph should also reflect to a new
    # edge feature on original graph
34
    data = F.randn((n_edges, D))
35
    L.ndata['w'] = data
36
    assert F.allclose(G.edata['w'], data)
37

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@parametrize_dtype
def test_hetero_linegraph(index_dtype):
    g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]),
        'user', 'follows', index_dtype=index_dtype)
    lg = dgl.line_heterograph(g)
    assert lg.number_of_nodes() == 5
    assert lg.number_of_edges() == 8
    row, col = lg.edges()
    assert np.array_equal(F.asnumpy(row),
                          np.array([0, 0, 1, 2, 2, 3, 4, 4]))
    assert np.array_equal(F.asnumpy(col),
                          np.array([3, 4, 0, 3, 4, 0, 1, 2]))

    lg = dgl.line_heterograph(g, backtracking=False)
    assert lg.number_of_nodes() == 5
    assert lg.number_of_edges() == 4
    row, col = lg.edges()
    assert np.array_equal(F.asnumpy(row),
                          np.array([0, 1, 2, 4]))
    assert np.array_equal(F.asnumpy(col),
                          np.array([4, 0, 3, 1]))
    g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 
        'user', 'follows', restrict_format='csr', index_dtype=index_dtype)
    lg = dgl.line_heterograph(g)
    assert lg.number_of_nodes() == 5
    assert lg.number_of_edges() == 8
    row, col = lg.edges()
    assert np.array_equal(F.asnumpy(row),
                          np.array([0, 0, 1, 2, 2, 3, 4, 4]))
    assert np.array_equal(F.asnumpy(col),
                          np.array([3, 4, 0, 3, 4, 0, 1, 2]))

    g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 
        'user', 'follows', restrict_format='csc', index_dtype=index_dtype)
    lg = dgl.line_heterograph(g)
    assert lg.number_of_nodes() == 5
    assert lg.number_of_edges() == 8
    row, col, eid = lg.edges('all')
    row = F.asnumpy(row)
    col = F.asnumpy(col)
    eid = F.asnumpy(eid).astype(int)
    order = np.argsort(eid)
    assert np.array_equal(row[order],
                          np.array([0, 0, 1, 2, 2, 3, 4, 4]))
    assert np.array_equal(col[order],
                          np.array([3, 4, 0, 3, 4, 0, 1, 2]))
84

85
86
87
88
89
90
91
92
93
94
95
96
def test_no_backtracking():
    N = 5
    G = dgl.DGLGraph(nx.star_graph(N))
    L = G.line_graph(backtracking=False)
    assert L.number_of_nodes() == 2 * N
    for i in range(1, N):
        e1 = G.edge_id(0, i)
        e2 = G.edge_id(i, 0)
        assert not L.has_edge_between(e1, e2)
        assert not L.has_edge_between(e2, e1)

# reverse graph related
97
98


99
100
101
102
103
def test_reverse():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    # The graph need not to be completely connected.
    g.add_edges([0, 1, 2], [1, 2, 1])
104
105
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]])
    g.edata['h'] = F.tensor([[5.], [6.], [7.]])
106
107
108
109
110
111
    rg = g.reverse()

    assert g.is_multigraph == rg.is_multigraph

    assert g.number_of_nodes() == rg.number_of_nodes()
    assert g.number_of_edges() == rg.number_of_edges()
112
113
    assert F.allclose(F.astype(rg.has_edges_between(
        [1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,)))
114
115
116
117
    assert g.edge_id(0, 1) == rg.edge_id(1, 0)
    assert g.edge_id(1, 2) == rg.edge_id(2, 1)
    assert g.edge_id(2, 1) == rg.edge_id(1, 2)

118

119
120
121
122
def test_reverse_shared_frames():
    g = dgl.DGLGraph()
    g.add_nodes(3)
    g.add_edges([0, 1, 2], [1, 2, 1])
123
124
    g.ndata['h'] = F.tensor([[0.], [1.], [2.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.]])
125
126

    rg = g.reverse(share_ndata=True, share_edata=True)
127
128
129
    assert F.allclose(g.ndata['h'], rg.ndata['h'])
    assert F.allclose(g.edata['h'], rg.edata['h'])
    assert F.allclose(g.edges[[0, 2], [1, 1]].data['h'],
130
131
132
                      rg.edges[[1, 1], [0, 2]].data['h'])

    rg.ndata['h'] = rg.ndata['h'] + 1
133
    assert F.allclose(rg.ndata['h'], g.ndata['h'])
134
135

    g.edata['h'] = g.edata['h'] - 1
136
    assert F.allclose(rg.edata['h'], g.edata['h'])
137
138
139
140
141

    src_msg = fn.copy_src(src='h', out='m')
    sum_reduce = fn.sum(msg='m', out='h')

    rg.update_all(src_msg, sum_reduce)
142
    assert F.allclose(g.ndata['h'], rg.ndata['h'])
143

144

145
146
147
148
149
150
151
152
153
154
def test_simple_graph():
    elist = [(0, 1), (0, 2), (1, 2), (0, 1)]
    g = dgl.DGLGraph(elist, readonly=True)
    assert g.is_multigraph
    sg = dgl.to_simple_graph(g)
    assert not sg.is_multigraph
    assert sg.number_of_edges() == 3
    src, dst = sg.edges()
    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
    assert eset == set(elist)
155

156

157
158
def test_bidirected_graph():
    def _test(in_readonly, out_readonly):
159
160
161
        elist = [(0, 0), (0, 1), (1, 0),
                (1, 1), (2, 1), (2, 2)]
        num_edges = 7
162
163
164
165
        g = dgl.DGLGraph(elist, readonly=in_readonly)
        elist.append((1, 2))
        elist = set(elist)
        big = dgl.to_bidirected(g, out_readonly)
166
        assert big.number_of_edges() == num_edges
167
168
169
170
171
172
173
174
175
        src, dst = big.edges()
        eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
        assert eset == set(elist)

    _test(True, True)
    _test(True, False)
    _test(False, True)
    _test(False, False)

176

177
178
179
180
def test_khop_graph():
    N = 20
    feat = F.randn((N, 5))

Mufei Li's avatar
Mufei Li committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    def _test(g):
        for k in range(4):
            g_k = dgl.khop_graph(g, k)
            # use original graph to do message passing for k times.
            g.ndata['h'] = feat
            for _ in range(k):
                g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
            h_0 = g.ndata.pop('h')
            # use k-hop graph to do message passing for one time.
            g_k.ndata['h'] = feat
            g_k.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
            h_1 = g_k.ndata.pop('h')
            assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3)

    # Test for random undirected graphs
    g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
    _test(g)
    # Test for random directed graphs
    g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3, directed=True))
    _test(g)
201

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def test_khop_adj():
    N = 20
    feat = F.randn((N, 5))
    g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
    for k in range(3):
        adj = F.tensor(dgl.khop_adj(g, k))
        # use original graph to do message passing for k times.
        g.ndata['h'] = feat
        for _ in range(k):
            g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
        h_0 = g.ndata.pop('h')
        # use k-hop adj to do message passing for one time.
        h_1 = F.matmul(adj, feat)
        assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3)

217

218
219
220
221
222
223
224
def test_laplacian_lambda_max():
    N = 20
    eps = 1e-6
    # test DGLGraph
    g = dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
    l_max = dgl.laplacian_lambda_max(g)
    assert (l_max[0] < 2 + eps)
Zihao Ye's avatar
Zihao Ye committed
225
    # test batched DGLGraph
226
227
228
229
230
231
232
233
234
235
    N_arr = [20, 30, 10, 12]
    bg = dgl.batch([
        dgl.DGLGraph(nx.erdos_renyi_graph(N, 0.3))
        for N in N_arr
    ])
    l_max_arr = dgl.laplacian_lambda_max(bg)
    assert len(l_max_arr) == len(N_arr)
    for l_max in l_max_arr:
        assert l_max < 2 + eps

236

VoVAllen's avatar
VoVAllen committed
237
def test_add_self_loop():
238
239
240
    g = dgl.DGLGraph()
    g.add_nodes(5)
    g.add_edges([0, 1, 2], [1, 1, 2])
VoVAllen's avatar
VoVAllen committed
241
242
    # Nodes 0, 3, 4 don't have self-loop
    new_g = dgl.transform.add_self_loop(g)
243
244
245
246
247
248
249
250
251
252
253
254
    assert F.allclose(new_g.edges()[0], F.tensor([0, 0, 1, 2, 3, 4]))
    assert F.allclose(new_g.edges()[1], F.tensor([1, 0, 1, 2, 3, 4]))


def test_remove_self_loop():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    g.add_edges([0, 1, 2], [1, 1, 2])
    new_g = dgl.transform.remove_self_loop(g)
    assert F.allclose(new_g.edges()[0], F.tensor([0]))
    assert F.allclose(new_g.edges()[1], F.tensor([1]))

255
256
257
258
def create_large_graph_index(num_nodes):
    row = np.random.choice(num_nodes, num_nodes * 10)
    col = np.random.choice(num_nodes, num_nodes * 10)
    spm = spsp.coo_matrix((np.ones(len(row)), (row, col)))
259
260

    return from_scipy_sparse_matrix(spm, True)
261
262
263
264
265
266
267
268
269

def get_nodeflow(g, node_ids, num_layers):
    batch_size = len(node_ids)
    expand_factor = g.number_of_nodes()
    sampler = dgl.contrib.sampling.NeighborSampler(g, batch_size,
            expand_factor=expand_factor, num_hops=num_layers,
            seed_nodes=node_ids)
    return next(iter(sampler))

270
def test_partition_with_halo():
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
    g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True)
    node_part = np.random.choice(4, g.number_of_nodes())
    subgs = dgl.transform.partition_graph_with_halo(g, node_part, 2)
    for part_id, subg in subgs.items():
        node_ids = np.nonzero(node_part == part_id)[0]
        lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0]
        nf = get_nodeflow(g, node_ids, 2)
        lnf = get_nodeflow(subg, lnode_ids, 2)
        for i in range(nf.num_layers):
            layer_nids1 = F.asnumpy(nf.layer_parent_nid(i))
            layer_nids2 = lnf.layer_parent_nid(i)
            layer_nids2 = F.asnumpy(F.gather_row(subg.parent_nid, layer_nids2))
            assert np.all(np.sort(layer_nids1) == np.sort(layer_nids2))

        for i in range(nf.num_blocks):
            block_eids1 = F.asnumpy(nf.block_parent_eid(i))
            block_eids2 = lnf.block_parent_eid(i)
            block_eids2 = F.asnumpy(F.gather_row(subg.parent_eid, block_eids2))
            assert np.all(np.sort(block_eids1) == np.sort(block_eids2))
290

291
292
293
294
295
296
    subgs = dgl.transform.partition_graph_with_halo(g, node_part, 2, reshuffle=True)
    for part_id, subg in subgs.items():
        node_ids = np.nonzero(node_part == part_id)[0]
        lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0]
        assert np.all(np.sort(F.asnumpy(subg.ndata['orig_id'])[lnode_ids]) == node_ids)

297
298
@unittest.skipIf(F._default_context_str == 'gpu', reason="METIS doesn't support GPU")
def test_metis_partition():
Da Zheng's avatar
Da Zheng committed
299
    # TODO(zhengda) Metis fails to partition a small graph.
300
    g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True)
Da Zheng's avatar
Da Zheng committed
301
302
303
    check_metis_partition(g, 0)
    check_metis_partition(g, 1)
    check_metis_partition(g, 2)
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    check_metis_partition_with_constraint(g)

def check_metis_partition_with_constraint(g):
    ntypes = np.zeros((g.number_of_nodes(),), dtype=np.int32)
    ntypes[0:int(g.number_of_nodes()/4)] = 1
    ntypes[int(g.number_of_nodes()*3/4):] = 2
    subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=1, balance_ntypes=ntypes)
    if subgs is not None:
        for i in subgs:
            subg = subgs[i]
            parent_nids = F.asnumpy(subg.ndata[dgl.NID])
            sub_ntypes = ntypes[parent_nids]
            print('type0:', np.sum(sub_ntypes == 0))
            print('type1:', np.sum(sub_ntypes == 1))
            print('type2:', np.sum(sub_ntypes == 2))
    subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=1,
                                          balance_ntypes=ntypes, balance_edges=True)
    if subgs is not None:
        for i in subgs:
            subg = subgs[i]
            parent_nids = F.asnumpy(subg.ndata[dgl.NID])
            sub_ntypes = ntypes[parent_nids]
            print('type0:', np.sum(sub_ntypes == 0))
            print('type1:', np.sum(sub_ntypes == 1))
            print('type2:', np.sum(sub_ntypes == 2))
Da Zheng's avatar
Da Zheng committed
329
330
331

def check_metis_partition(g, extra_hops):
    subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=extra_hops)
332
333
334
335
    num_inner_nodes = 0
    num_inner_edges = 0
    if subgs is not None:
        for part_id, subg in subgs.items():
Da Zheng's avatar
Da Zheng committed
336
337
338
339
340
            lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0]
            ledge_ids = np.nonzero(F.asnumpy(subg.edata['inner_edge']))[0]
            num_inner_nodes += len(lnode_ids)
            num_inner_edges += len(ledge_ids)
            assert np.sum(F.asnumpy(subg.ndata['part_id']) == part_id) == len(lnode_ids)
341
342
343
        assert num_inner_nodes == g.number_of_nodes()
        print(g.number_of_edges() - num_inner_edges)

Da Zheng's avatar
Da Zheng committed
344
345
346
    if extra_hops == 0:
        return

347
    # partitions with node reshuffling
Da Zheng's avatar
Da Zheng committed
348
    subgs = dgl.transform.metis_partition(g, 4, extra_cached_hops=extra_hops, reshuffle=True)
349
350
    num_inner_nodes = 0
    num_inner_edges = 0
Da Zheng's avatar
Da Zheng committed
351
    edge_cnts = np.zeros((g.number_of_edges(),))
352
353
354
355
356
357
358
    if subgs is not None:
        for part_id, subg in subgs.items():
            lnode_ids = np.nonzero(F.asnumpy(subg.ndata['inner_node']))[0]
            ledge_ids = np.nonzero(F.asnumpy(subg.edata['inner_edge']))[0]
            num_inner_nodes += len(lnode_ids)
            num_inner_edges += len(ledge_ids)
            assert np.sum(F.asnumpy(subg.ndata['part_id']) == part_id) == len(lnode_ids)
Da Zheng's avatar
Da Zheng committed
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
            nids = F.asnumpy(subg.ndata[dgl.NID])

            # ensure the local node Ids are contiguous.
            parent_ids = F.asnumpy(subg.ndata[dgl.NID])
            parent_ids = parent_ids[:len(lnode_ids)]
            assert np.all(parent_ids == np.arange(parent_ids[0], parent_ids[-1] + 1))

            # count the local edges.
            parent_ids = F.asnumpy(subg.edata[dgl.EID])[ledge_ids]
            edge_cnts[parent_ids] += 1

            orig_ids = subg.ndata['orig_id']
            inner_node = F.asnumpy(subg.ndata['inner_node'])
            for nid in range(subg.number_of_nodes()):
                neighs = subg.predecessors(nid)
                old_neighs1 = F.gather_row(orig_ids, neighs)
                old_nid = F.asnumpy(orig_ids[nid])
                old_neighs2 = g.predecessors(old_nid)
                # If this is an inner node, it should have the full neighborhood.
                if inner_node[nid]:
                    assert np.all(np.sort(F.asnumpy(old_neighs1)) == np.sort(F.asnumpy(old_neighs2)))
        # Normally, local edges are only counted once.
        assert np.all(edge_cnts == 1)

383
384
385
        assert num_inner_nodes == g.number_of_nodes()
        print(g.number_of_edges() - num_inner_edges)

Da Zheng's avatar
Da Zheng committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
@unittest.skipIf(F._default_context_str == 'gpu', reason="It doesn't support GPU")
def test_reorder_nodes():
    g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True)
    new_nids = np.random.permutation(g.number_of_nodes())
    # TODO(zhengda) we need to test both CSR and COO.
    new_g = dgl.transform.reorder_nodes(g, new_nids)
    new_in_deg = new_g.in_degrees()
    new_out_deg = new_g.out_degrees()
    in_deg = g.in_degrees()
    out_deg = g.out_degrees()
    new_in_deg1 = F.scatter_row(in_deg, F.tensor(new_nids), in_deg)
    new_out_deg1 = F.scatter_row(out_deg, F.tensor(new_nids), out_deg)
    assert np.all(F.asnumpy(new_in_deg == new_in_deg1))
    assert np.all(F.asnumpy(new_out_deg == new_out_deg1))
    orig_ids = F.asnumpy(new_g.ndata['orig_id'])
401
402
403
404
405
406
407
    for nid in range(g.number_of_nodes()):
        neighs = F.asnumpy(g.successors(nid))
        new_neighs1 = new_nids[neighs]
        new_nid = new_nids[nid]
        new_neighs2 = new_g.successors(new_nid)
        assert np.all(np.sort(new_neighs1) == np.sort(F.asnumpy(new_neighs2)))

Da Zheng's avatar
Da Zheng committed
408
409
410
411
412
413
414
415
416
417
418
419
420
    for nid in range(new_g.number_of_nodes()):
        neighs = F.asnumpy(new_g.successors(nid))
        old_neighs1 = orig_ids[neighs]
        old_nid = orig_ids[nid]
        old_neighs2 = g.successors(old_nid)
        assert np.all(np.sort(old_neighs1) == np.sort(F.asnumpy(old_neighs2)))

        neighs = F.asnumpy(new_g.predecessors(nid))
        old_neighs1 = orig_ids[neighs]
        old_nid = orig_ids[nid]
        old_neighs2 = g.predecessors(old_nid)
        assert np.all(np.sort(old_neighs1) == np.sort(F.asnumpy(old_neighs2)))

421
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
422
423
424
425
426
427
@parametrize_dtype
def test_in_subgraph(index_dtype):
    g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype)
    g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype)
    g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype)
    g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype)
428
429
    hg = dgl.hetero_from_relations([g1, g2, g3, g4])
    subg = dgl.in_subgraph(hg, {'user' : [0,1], 'game' : 0})
430
    assert subg._idtype_str == index_dtype
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
    assert edge_set == {(1,0),(2,0),(3,0),(0,1),(2,1),(3,1)}
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
    assert edge_set == {(0,0)}
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
    assert edge_set == {(2,0),(2,1),(1,0),(0,0)}
    assert subg['flips'].number_of_edges() == 0

@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
448
449
450
451
452
453
@parametrize_dtype
def test_out_subgraph(index_dtype):
    g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype)
    g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype)
    g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype)
    g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype)
454
455
    hg = dgl.hetero_from_relations([g1, g2, g3, g4])
    subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0})
456
    assert subg._idtype_str == index_dtype
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
    u, v = subg['follow'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(1,0),(0,1),(0,2)}
    assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
    u, v = subg['play'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(0,1),(1,2)}
    assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
    u, v = subg['liked-by'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0)}
    assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
    u, v = subg['flips'].edges()
    edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
    assert edge_set == {(0,0),(1,0)}
    assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
475

476
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU compaction not implemented")
477
478
@parametrize_dtype
def test_compact(index_dtype):
479
480
481
482
    g1 = dgl.heterograph({
        ('user', 'follow', 'user'): [(1, 3), (3, 5)],
        ('user', 'plays', 'game'): [(2, 4), (3, 4), (2, 5)],
        ('game', 'wished-by', 'user'): [(6, 7), (5, 7)]},
483
        {'user': 20, 'game': 10}, index_dtype=index_dtype)
484
485
486
487

    g2 = dgl.heterograph({
        ('game', 'clicked-by', 'user'): [(3, 1)],
        ('user', 'likes', 'user'): [(1, 8), (8, 9)]},
488
        {'user': 20, 'game': 10}, index_dtype=index_dtype)
489

490
491
    g3 = dgl.graph([(0, 1), (1, 2)], num_nodes=10, ntype='user', index_dtype=index_dtype)
    g4 = dgl.graph([(1, 3), (3, 5)], num_nodes=10, ntype='user', index_dtype=index_dtype)
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513

    def _check(g, new_g, induced_nodes):
        assert g.ntypes == new_g.ntypes
        assert g.canonical_etypes == new_g.canonical_etypes

        for ntype in g.ntypes:
            assert -1 not in induced_nodes[ntype]

        for etype in g.canonical_etypes:
            g_src, g_dst = g.all_edges(order='eid', etype=etype)
            g_src = F.asnumpy(g_src)
            g_dst = F.asnumpy(g_dst)
            new_g_src, new_g_dst = new_g.all_edges(order='eid', etype=etype)
            new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)]
            new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)]
            assert (g_src == new_g_src_mapped).all()
            assert (g_dst == new_g_dst_mapped).all()

    # Test default
    new_g1 = dgl.compact_graphs(g1)
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
514
    assert new_g1._idtype_str == index_dtype
515
516
517
518
519
520
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a dict
    new_g1 = dgl.compact_graphs(
521
522
        g1, always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))})
    assert new_g1._idtype_str == index_dtype
523
524
525
526
527
528
529
530
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a tensor
    new_g3 = dgl.compact_graphs(
531
        g3, always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype)))
532
533
    induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
534
535
    
    assert new_g3._idtype_str == index_dtype
536
537
538
539
540
541
542
    assert set(induced_nodes['user']) == set([0, 1, 2, 7])
    _check(g3, new_g3, induced_nodes)

    # Test multiple graphs
    new_g1, new_g2 = dgl.compact_graphs([g1, g2])
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
543
544
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
545
546
547
548
549
550
551
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a dict
    new_g1, new_g2 = dgl.compact_graphs(
552
        [g1, g2], always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))})
553
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
554
555
556
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}    
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
557
558
559
560
561
562
563
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a tensor
    new_g3, new_g4 = dgl.compact_graphs(
564
        [g3, g4], always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype)))
565
566
    induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
567
568
569
    
    assert new_g3._idtype_str == index_dtype
    assert new_g4._idtype_str == index_dtype
570
571
572
573
    assert set(induced_nodes['user']) == set([0, 1, 2, 3, 5, 7])
    _check(g3, new_g3, induced_nodes)
    _check(g4, new_g4, induced_nodes)

574
575
@parametrize_dtype
def test_to_simple(index_dtype):
576
577
    g = dgl.heterograph({
        ('user', 'follow', 'user'): [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)],
578
        ('user', 'plays', 'game'): [(3, 5), (2, 3), (1, 4), (1, 4), (3, 5), (2, 3), (2, 3)]}, index_dtype=index_dtype)
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
    sg = dgl.to_simple(g, return_counts='weights', writeback_mapping='new_eid')

    for etype in g.canonical_etypes:
        u, v = g.all_edges(form='uv', order='eid', etype=etype)
        u = F.asnumpy(u).tolist()
        v = F.asnumpy(v).tolist()
        uv = list(zip(u, v))
        eid_map = F.asnumpy(g.edges[etype].data['new_eid'])

        su, sv = sg.all_edges(form='uv', order='eid', etype=etype)
        su = F.asnumpy(su).tolist()
        sv = F.asnumpy(sv).tolist()
        suv = list(zip(su, sv))
        sw = F.asnumpy(sg.edges[etype].data['weights'])

        assert set(uv) == set(suv)
        for i, e in enumerate(suv):
            assert sw[i] == sum(e == _e for _e in uv)
        for i, e in enumerate(uv):
            assert eid_map[i] == suv.index(e)

600
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU compaction not implemented")
601
602
@parametrize_dtype
def test_to_block(index_dtype):
603
    def check(g, bg, ntype, etype, dst_nodes, include_dst_in_src=True):
604
605
606
        if dst_nodes is not None:
            assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes)
        n_dst_nodes = bg.number_of_nodes('DST/' + ntype)
607
608
609
610
        if include_dst_in_src:
            assert F.array_equal(
                bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes],
                bg.dstnodes[ntype].data[dgl.NID])
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627

        g = g[etype]
        bg = bg[etype]
        induced_src = bg.srcdata[dgl.NID]
        induced_dst = bg.dstdata[dgl.NID]
        induced_eid = bg.edata[dgl.EID]
        bg_src, bg_dst = bg.all_edges(order='eid')
        src_ans, dst_ans = g.all_edges(order='eid')

        induced_src_bg = F.gather_row(induced_src, bg_src)
        induced_dst_bg = F.gather_row(induced_dst, bg_dst)
        induced_src_ans = F.gather_row(src_ans, induced_eid)
        induced_dst_ans = F.gather_row(dst_ans, induced_eid)

        assert F.array_equal(induced_src_bg, induced_src_ans)
        assert F.array_equal(induced_dst_bg, induced_dst_ans)

628
    def checkall(g, bg, dst_nodes, include_dst_in_src=True):
629
630
        for etype in g.etypes:
            ntype = g.to_canonical_etype(etype)[2]
631
            if dst_nodes is not None and ntype in dst_nodes:
632
                check(g, bg, ntype, etype, dst_nodes[ntype], include_dst_in_src)
633
            else:
634
                check(g, bg, ntype, etype, None, include_dst_in_src)
635
636
637
638

    g = dgl.heterograph({
        ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
        ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
639
        ('B', 'BA', 'A'): [(2, 3), (3, 2)]}, index_dtype=index_dtype)
640
641
642
643
    g_a = g['AA']

    bg = dgl.to_block(g_a)
    check(g_a, bg, 'A', 'AA', None)
644
645
646
647
648
649
650
    assert bg.number_of_src_nodes() == 5
    assert bg.number_of_dst_nodes() == 4

    bg = dgl.to_block(g_a, include_dst_in_src=False)
    check(g_a, bg, 'A', 'AA', None, False)
    assert bg.number_of_src_nodes() == 4
    assert bg.number_of_dst_nodes() == 4
651

652
    dst_nodes = F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype))
653
654
    bg = dgl.to_block(g_a, dst_nodes)
    check(g_a, bg, 'A', 'AA', dst_nodes)
655
656
657
658

    g_ab = g['AB']

    bg = dgl.to_block(g_ab)
659
    assert bg._idtype_str == index_dtype
660
661
662
    assert bg.number_of_nodes('SRC/B') == 4
    assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID])
    assert bg.number_of_nodes('DST/A') == 0
663
664
    checkall(g_ab, bg, None)

665
    dst_nodes = {'B': F.tensor([5, 6, 3, 1], dtype=getattr(F, index_dtype))}
666
    bg = dgl.to_block(g, dst_nodes)
667
    assert bg.number_of_nodes('SRC/B') == 4
668
669
670
    assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID])
    assert bg.number_of_nodes('DST/A') == 0
    checkall(g, bg, dst_nodes)
671

672
    dst_nodes = {'A': F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)), 'B': F.tensor([3, 5, 6, 1], dtype=getattr(F, index_dtype))}
673
674
    bg = dgl.to_block(g, dst_nodes=dst_nodes)
    checkall(g, bg, dst_nodes)
675
676

@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
677
678
@parametrize_dtype
def test_remove_edges(index_dtype):
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
    def check(g1, etype, g, edges_removed):
        src, dst, eid = g.edges(etype=etype, form='all')
        src1, dst1 = g1.edges(etype=etype, order='eid')
        if etype is not None:
            eid1 = g1.edges[etype].data[dgl.EID]
        else:
            eid1 = g1.edata[dgl.EID]
        src1 = F.asnumpy(src1)
        dst1 = F.asnumpy(dst1)
        eid1 = F.asnumpy(eid1)
        src = F.asnumpy(src)
        dst = F.asnumpy(dst)
        eid = F.asnumpy(eid)
        sde_set = set(zip(src, dst, eid))

        for s, d, e in zip(src1, dst1, eid1):
            assert (s, d, e) in sde_set
        assert not np.isin(edges_removed, eid1).any()
697
        assert g1.idtype == g.idtype
698
699
700

    for fmt in ['coo', 'csr', 'csc']:
        for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]:
701
702
            g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)], restrict_format=fmt, index_dtype=index_dtype)
            g1 = dgl.remove_edges(g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
703
704
705
706
            check(g1, None, g, edges_to_remove)

            g = dgl.graph(
                spsp.csr_matrix(([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)),
707
708
                restrict_format=fmt, index_dtype=index_dtype)
            g1 = dgl.remove_edges(g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
709
710
711
712
713
            check(g1, None, g, edges_to_remove)

    g = dgl.heterograph({
        ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
        ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
714
715
        ('B', 'BA', 'A'): [(2, 3), (3, 2)]}, index_dtype=index_dtype)
    g2 = dgl.remove_edges(g, {'AA': F.tensor([2], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype))})
716
717
718
    check(g2, 'AA', g, [2])
    check(g2, 'AB', g, [3])
    check(g2, 'BA', g, [1])
719

720
    g3 = dgl.remove_edges(g, {'AA': F.tensor([], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype))})
721
722
723
724
    check(g3, 'AA', g, [])
    check(g3, 'AB', g, [3])
    check(g3, 'BA', g, [1])

725
    g4 = dgl.remove_edges(g, {'AB': F.tensor([3, 1, 2, 0], getattr(F, index_dtype))})
726
    check(g4, 'AA', g, [])
727
    check(g4, 'AB', g, [3, 1, 2, 0])
728
729
    check(g4, 'BA', g, [])

730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
def test_cast():
    m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4))
    g = dgl.DGLGraph(m, readonly=True)
    gsrc, gdst = g.edges(order='eid')
    ndata = F.randn((4, 5))
    edata = F.randn((2, 4))
    g.ndata['x'] = ndata
    g.edata['y'] = edata

    hg = dgl.as_heterograph(g, 'A', 'AA')
    assert hg.ntypes == ['A']
    assert hg.etypes == ['AA']
    assert hg.canonical_etypes == [('A', 'AA', 'A')]
    assert hg.number_of_nodes() == 4
    assert hg.number_of_edges() == 2
    hgsrc, hgdst = hg.edges(order='eid')
    assert F.array_equal(gsrc, hgsrc)
    assert F.array_equal(gdst, hgdst)

    g2 = dgl.as_immutable_graph(hg)
    assert g2.number_of_nodes() == 4
    assert g2.number_of_edges() == 2
    g2src, g2dst = hg.edges(order='eid')
    assert F.array_equal(g2src, gsrc)
    assert F.array_equal(g2dst, gdst)

756
if __name__ == '__main__':
Da Zheng's avatar
Da Zheng committed
757
    test_reorder_nodes()
758
759
760
761
762
763
764
765
766
767
768
769
    # test_line_graph()
    # test_no_backtracking()
    # test_reverse()
    # test_reverse_shared_frames()
    # test_simple_graph()
    # test_bidirected_graph()
    # test_khop_adj()
    # test_khop_graph()
    # test_laplacian_lambda_max()
    # test_remove_self_loop()
    # test_add_self_loop()
    # test_partition_with_halo()
770
    # test_metis_partition()
771
    test_hetero_linegraph('int32')
772
    # test_compact()
773
    test_to_simple("int32")
774
775
    # test_in_subgraph("int32")
    # test_out_subgraph()
776
    # test_to_block("int32")
777
    # test_remove_edges()