test_batch-graph.py 10.2 KB
Newer Older
1
2
3
4
import unittest

import backend as F

5
import dgl
6
import numpy as np
7
from utils import parametrize_idtype
8

9

10
def tree1(idtype):
11
12
13
14
15
16
17
18
    """Generate a tree
         0
        / \
       1   2
      / \
     3   4
    Edges are from leaves to root.
    """
19
    g = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Minjie Wang's avatar
Minjie Wang committed
20
    g.add_nodes(5)
21
22
23
24
    g.add_edges(3, 1)
    g.add_edges(4, 1)
    g.add_edges(1, 0)
    g.add_edges(2, 0)
25
26
    g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
    g.edata["h"] = F.randn((4, 10))
27
28
    return g

29

30
def tree2(idtype):
31
32
33
34
35
36
37
38
    """Generate a tree
         1
        / \
       4   3
      / \
     2   0
    Edges are from leaves to root.
    """
39
    g = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Minjie Wang's avatar
Minjie Wang committed
40
    g.add_nodes(5)
41
42
43
44
    g.add_edges(2, 4)
    g.add_edges(0, 4)
    g.add_edges(4, 1)
    g.add_edges(3, 1)
45
46
    g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
    g.edata["h"] = F.randn((4, 10))
47
48
    return g

49

nv-dlasalle's avatar
nv-dlasalle committed
50
@parametrize_idtype
51
52
53
def test_batch_unbatch(idtype):
    t1 = tree1(idtype)
    t2 = tree2(idtype)
54
55

    bg = dgl.batch([t1, t2])
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
56
57
    assert bg.num_nodes() == 10
    assert bg.num_edges() == 8
Minjie Wang's avatar
Minjie Wang committed
58
    assert bg.batch_size == 2
59
60
    assert F.allclose(bg.batch_num_nodes(), F.tensor([5, 5]))
    assert F.allclose(bg.batch_num_edges(), F.tensor([4, 4]))
Minjie Wang's avatar
Minjie Wang committed
61
62

    tt1, tt2 = dgl.unbatch(bg)
63
64
65
66
67
    assert F.allclose(t1.ndata["h"], tt1.ndata["h"])
    assert F.allclose(t1.edata["h"], tt1.edata["h"])
    assert F.allclose(t2.ndata["h"], tt2.ndata["h"])
    assert F.allclose(t2.edata["h"], tt2.edata["h"])

Minjie Wang's avatar
Minjie Wang committed
68

nv-dlasalle's avatar
nv-dlasalle committed
69
@parametrize_idtype
70
71
72
def test_batch_unbatch1(idtype):
    t1 = tree1(idtype)
    t2 = tree2(idtype)
Minjie Wang's avatar
Minjie Wang committed
73
74
    b1 = dgl.batch([t1, t2])
    b2 = dgl.batch([t2, b1])
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
75
76
    assert b2.num_nodes() == 15
    assert b2.num_edges() == 12
Minjie Wang's avatar
Minjie Wang committed
77
    assert b2.batch_size == 3
78
79
    assert F.allclose(b2.batch_num_nodes(), F.tensor([5, 5, 5]))
    assert F.allclose(b2.batch_num_edges(), F.tensor([4, 4, 4]))
Minjie Wang's avatar
Minjie Wang committed
80
81

    s1, s2, s3 = dgl.unbatch(b2)
82
83
84
85
86
87
88
89
90
91
92
93
    assert F.allclose(t2.ndata["h"], s1.ndata["h"])
    assert F.allclose(t2.edata["h"], s1.edata["h"])
    assert F.allclose(t1.ndata["h"], s2.ndata["h"])
    assert F.allclose(t1.edata["h"], s2.edata["h"])
    assert F.allclose(t2.ndata["h"], s3.ndata["h"])
    assert F.allclose(t2.edata["h"], s3.edata["h"])


@unittest.skipIf(
    dgl.backend.backend_name == "tensorflow",
    reason="TF doesn't support inplace update",
)
nv-dlasalle's avatar
nv-dlasalle committed
94
@parametrize_idtype
95
def test_batch_unbatch_frame(idtype):
96
97
98
    """Test module of node/edge frames of batched/unbatched DGLGraphs.
    Also address the bug mentioned in https://github.com/dmlc/dgl/issues/1475.
    """
99
100
    t1 = tree1(idtype)
    t2 = tree2(idtype)
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
101
102
103
104
    N1 = t1.num_nodes()
    E1 = t1.num_edges()
    N2 = t2.num_nodes()
    E2 = t2.num_edges()
105
    D = 10
106
107
108
109
110
    t1.ndata["h"] = F.randn((N1, D))
    t1.edata["h"] = F.randn((E1, D))
    t2.ndata["h"] = F.randn((N2, D))
    t2.edata["h"] = F.randn((E2, D))

111
112
    b1 = dgl.batch([t1, t2])
    b2 = dgl.batch([t2])
113
114
115
116
117
118
119
120
    b1.ndata["h"][:N1] = F.zeros((N1, D))
    b1.edata["h"][:E1] = F.zeros((E1, D))
    b2.ndata["h"][:N2] = F.zeros((N2, D))
    b2.edata["h"][:E2] = F.zeros((E2, D))
    assert not F.allclose(t1.ndata["h"], F.zeros((N1, D)))
    assert not F.allclose(t1.edata["h"], F.zeros((E1, D)))
    assert not F.allclose(t2.ndata["h"], F.zeros((N2, D)))
    assert not F.allclose(t2.edata["h"], F.zeros((E2, D)))
121

122
    g1, g2 = dgl.unbatch(b1)
123
124
125
126
127
128
129
130
    (_g2,) = dgl.unbatch(b2)
    assert F.allclose(g1.ndata["h"], F.zeros((N1, D)))
    assert F.allclose(g1.edata["h"], F.zeros((E1, D)))
    assert F.allclose(g2.ndata["h"], t2.ndata["h"])
    assert F.allclose(g2.edata["h"], t2.edata["h"])
    assert F.allclose(_g2.ndata["h"], F.zeros((N2, D)))
    assert F.allclose(_g2.edata["h"], F.zeros((E2, D)))

131

nv-dlasalle's avatar
nv-dlasalle committed
132
@parametrize_idtype
133
def test_batch_unbatch2(idtype):
134
    # test setting/getting features after batch
135
    a = dgl.graph(([], [])).astype(idtype).to(F.ctx())
136
137
    a.add_nodes(4)
    a.add_edges(0, [1, 2, 3])
138
    b = dgl.graph(([], [])).astype(idtype).to(F.ctx())
139
140
141
    b.add_nodes(3)
    b.add_edges(0, [1, 2])
    c = dgl.batch([a, b])
142
143
144
145
146
    c.ndata["h"] = F.ones((7, 1))
    c.edata["w"] = F.ones((5, 1))
    assert F.allclose(c.ndata["h"], F.ones((7, 1)))
    assert F.allclose(c.edata["w"], F.ones((5, 1)))

147

nv-dlasalle's avatar
nv-dlasalle committed
148
@parametrize_idtype
149
150
151
def test_batch_send_and_recv(idtype):
    t1 = tree1(idtype)
    t2 = tree2(idtype)
152
153

    bg = dgl.batch([t1, t2])
154
155
    _mfunc = lambda edges: {"m": edges.src["h"]}
    _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
156
157
158
    u = [3, 4, 2 + 5, 0 + 5]
    v = [1, 1, 4 + 5, 4 + 5]

159
    bg.send_and_recv((u, v), _mfunc, _rfunc)
160
161

    t1, t2 = dgl.unbatch(bg)
162
163
164
    assert F.asnumpy(t1.ndata["h"][1]) == 7
    assert F.asnumpy(t2.ndata["h"][4]) == 2

165

nv-dlasalle's avatar
nv-dlasalle committed
166
@parametrize_idtype
167
168
169
def test_batch_propagate(idtype):
    t1 = tree1(idtype)
    t2 = tree2(idtype)
170
171

    bg = dgl.batch([t1, t2])
172
173
    _mfunc = lambda edges: {"m": edges.src["h"]}
    _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
174
175
176
177
178
    # get leaves.

    order = []

    # step 1
Lingfan Yu's avatar
Lingfan Yu committed
179
180
    u = [3, 4, 2 + 5, 0 + 5]
    v = [1, 1, 4 + 5, 4 + 5]
181
182
183
    order.append((u, v))

    # step 2
Lingfan Yu's avatar
Lingfan Yu committed
184
185
    u = [1, 2, 4 + 5, 3 + 5]
    v = [0, 0, 1 + 5, 1 + 5]
186
187
    order.append((u, v))

188
    bg.prop_edges(order, _mfunc, _rfunc)
Lingfan Yu's avatar
Lingfan Yu committed
189
    t1, t2 = dgl.unbatch(bg)
190

191
192
193
    assert F.asnumpy(t1.ndata["h"][0]) == 9
    assert F.asnumpy(t2.ndata["h"][1]) == 5

194

nv-dlasalle's avatar
nv-dlasalle committed
195
@parametrize_idtype
196
def test_batched_edge_ordering(idtype):
197
    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Lingfan Yu's avatar
Lingfan Yu committed
198
199
    g1.add_nodes(6)
    g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
200
    e1 = F.randn((5, 10))
201
    g1.edata["h"] = e1
202
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Lingfan Yu's avatar
Lingfan Yu committed
203
    g2.add_nodes(6)
204
    g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
205
    e2 = F.randn((6, 10))
206
    g2.edata["h"] = e2
207
    g = dgl.batch([g1, g2])
208
209
    r1 = g.edata["h"][g.edge_ids(4, 5)]
    r2 = g1.edata["h"][g1.edge_ids(4, 5)]
210
    assert F.array_equal(r1, r2)
211

212

nv-dlasalle's avatar
nv-dlasalle committed
213
@parametrize_idtype
214
def test_batch_no_edge(idtype):
215
    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Lingfan Yu's avatar
Lingfan Yu committed
216
217
    g1.add_nodes(6)
    g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
218
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Lingfan Yu's avatar
Lingfan Yu committed
219
    g2.add_nodes(6)
220
    g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
221
    g3 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
Lingfan Yu's avatar
Lingfan Yu committed
222
    g3.add_nodes(1)  # no edges
223
224
    g = dgl.batch([g1, g3, g2])  # should not throw an error

Lingfan Yu's avatar
Lingfan Yu committed
225

nv-dlasalle's avatar
nv-dlasalle committed
226
@parametrize_idtype
227
228
229
def test_batch_keeps_empty_data(idtype):
    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g1.ndata["nh"] = F.tensor([])
230
    g1.edata["eh"] = F.tensor([])
231
232
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g2.ndata["nh"] = F.tensor([])
233
    g2.edata["eh"] = F.tensor([])
234
235
    g = dgl.batch([g1, g2])
    assert "nh" in g.ndata
236
237
    assert "eh" in g.edata

238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
    """Internal function to compute batch information for subgraphs.
    Parameters
    ----------
    keys : List[str]
        The node/edge type keys.
    induced_indices_arr : List[Tensor]
        The induced node/edge index tensor for all node/edge types.
    batch_num_objs : Tensor
        Number of nodes/edges for each graph in the original batch.
    Returns
    -------
    Mapping[str, Tensor]
        A dictionary mapping all node/edge type keys to the ``batch_num_objs``
        array of corresponding graph.
    """
255
256
257
    bucket_offset = np.expand_dims(
        np.cumsum(F.asnumpy(batch_num_objs), 0), -1
    )  # (num_bkts, 1)
258
259
260
261
    ret = {}
    for key, induced_indices in zip(keys, induced_indices_arr):
        # NOTE(Zihao): this implementation is not efficient and we can replace it with
        # binary search in the future.
262
263
264
        induced_indices = np.expand_dims(
            F.asnumpy(induced_indices), 0
        )  # (1, num_nodes)
265
266
267
268
269
270
271
        new_offset = np.sum((induced_indices < bucket_offset), 1)  # (num_bkts,)
        # start_offset = [0] + [new_offset[i-1] for i in range(1, n_bkts)]
        start_offset = np.concatenate([np.zeros((1,)), new_offset[:-1]], 0)
        new_batch_num_objs = new_offset - start_offset
        ret[key] = F.tensor(new_batch_num_objs, dtype=F.dtype(batch_num_objs))
    return ret

272

nv-dlasalle's avatar
nv-dlasalle committed
273
@parametrize_idtype
274
275
276
277
278
279
280
281
def test_set_batch_info(idtype):
    ctx = F.ctx()

    g1 = dgl.rand_graph(30, 100).astype(idtype).to(F.ctx())
    g2 = dgl.rand_graph(40, 200).astype(idtype).to(F.ctx())
    bg = dgl.batch([g1, g2])
    batch_num_nodes = F.astype(bg.batch_num_nodes(), idtype)
    batch_num_edges = F.astype(bg.batch_num_edges(), idtype)
282

283
284
    # test homogeneous node subgraph
    sg_n = dgl.node_subgraph(bg, list(range(10, 20)) + list(range(50, 60)))
285
286
287
288
289
290
291
292
    induced_nodes = sg_n.ndata["_ID"]
    induced_edges = sg_n.edata["_ID"]
    new_batch_num_nodes = _get_subgraph_batch_info(
        bg.ntypes, [induced_nodes], batch_num_nodes
    )
    new_batch_num_edges = _get_subgraph_batch_info(
        bg.canonical_etypes, [induced_edges], batch_num_edges
    )
293
294
295
296
297
298
299
300
301
    sg_n.set_batch_num_nodes(new_batch_num_nodes)
    sg_n.set_batch_num_edges(new_batch_num_edges)
    subg_n1, subg_n2 = dgl.unbatch(sg_n)
    subg1 = dgl.node_subgraph(g1, list(range(10, 20)))
    subg2 = dgl.node_subgraph(g2, list(range(20, 30)))
    assert subg_n1.num_edges() == subg1.num_edges()
    assert subg_n2.num_edges() == subg2.num_edges()

    # test homogeneous edge subgraph
302
303
304
    sg_e = dgl.edge_subgraph(
        bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False
    )
305
    induced_nodes = F.arange(0, bg.num_nodes(), idtype)
306
307
308
309
310
311
312
    induced_edges = sg_e.edata["_ID"]
    new_batch_num_nodes = _get_subgraph_batch_info(
        bg.ntypes, [induced_nodes], batch_num_nodes
    )
    new_batch_num_edges = _get_subgraph_batch_info(
        bg.canonical_etypes, [induced_edges], batch_num_edges
    )
313
314
315
    sg_e.set_batch_num_nodes(new_batch_num_nodes)
    sg_e.set_batch_num_edges(new_batch_num_edges)
    subg_e1, subg_e2 = dgl.unbatch(sg_e)
316
317
    subg1 = dgl.edge_subgraph(g1, list(range(40, 70)), relabel_nodes=False)
    subg2 = dgl.edge_subgraph(g2, list(range(50, 100)), relabel_nodes=False)
318
319
320
321
    assert subg_e1.num_nodes() == subg1.num_nodes()
    assert subg_e2.num_nodes() == subg2.num_nodes()


322
323
324
325
326
327
328
329
330
331
if __name__ == "__main__":
    # test_batch_unbatch()
    # test_batch_unbatch1()
    # test_batch_unbatch_frame()
    # test_batch_unbatch2()
    # test_batched_edge_ordering()
    # test_batch_send_then_recv()
    # test_batch_send_and_recv()
    # test_batch_propagate()
    # test_batch_no_edge()
332
    test_set_batch_info(F.int32)