test_pickle.py 8.28 KB
Newer Older
1
import networkx as nx
2
import scipy.sparse as ssp
Gan Quan's avatar
Gan Quan committed
3
import dgl
4
import dgl.contrib as contrib
Gan Quan's avatar
Gan Quan committed
5
6
7
from dgl.frame import Frame, FrameRef, Column
from dgl.graph_index import create_graph_index
from dgl.utils import toindex
8
9
import backend as F
import dgl.function as fn
Gan Quan's avatar
Gan Quan committed
10
11
12
import pickle
import io

13
14
15
def _assert_is_identical(g, g2):
    assert g.is_readonly == g2.is_readonly
    assert g.number_of_nodes() == g2.number_of_nodes()
16
17
    src, dst = g.all_edges(order='eid')
    src2, dst2 = g2.all_edges(order='eid')
18
19
20
21
22
23
24
25
26
27
    assert F.array_equal(src, src2)
    assert F.array_equal(dst, dst2)

    assert len(g.ndata) == len(g2.ndata)
    assert len(g.edata) == len(g2.edata)
    for k in g.ndata:
        assert F.allclose(g.ndata[k], g2.ndata[k])
    for k in g.edata:
        assert F.allclose(g.edata[k], g2.edata[k])

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def _assert_is_identical_hetero(g, g2):
    assert g.is_readonly == g2.is_readonly
    assert g.ntypes == g2.ntypes
    assert g.canonical_etypes == g2.canonical_etypes

    # check if two metagraphs are identical
    for edges, features in g.metagraph.edges(keys=True).items():
        assert g2.metagraph.edges(keys=True)[edges] == features

    # check if node ID spaces and feature spaces are equal
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == g2.number_of_nodes(ntype)
        assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data)
        for k in g.nodes[ntype].data:
            assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k])

    # check if edge ID spaces and feature spaces are equal
    for etype in g.canonical_etypes:
        src, dst = g.all_edges(etype=etype, order='eid')
        src2, dst2 = g2.all_edges(etype=etype, order='eid')
        assert F.array_equal(src, src2)
        assert F.array_equal(dst, dst2)
        for k in g.edges[etype].data:
            assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k])

53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def _assert_is_identical_nodeflow(nf1, nf2):
    assert nf1.is_readonly == nf2.is_readonly
    assert nf1.number_of_nodes() == nf2.number_of_nodes()
    src, dst = nf1.all_edges()
    src2, dst2 = nf2.all_edges()
    assert F.array_equal(src, src2)
    assert F.array_equal(dst, dst2)

    assert nf1.num_layers == nf2.num_layers
    for i in range(nf1.num_layers):
        assert nf1.layer_size(i) == nf2.layer_size(i)
        assert nf1.layers[i].data.keys() == nf2.layers[i].data.keys()
        for k in nf1.layers[i].data:
            assert F.allclose(nf1.layers[i].data[k], nf2.layers[i].data[k])
    assert nf1.num_blocks == nf2.num_blocks
    for i in range(nf1.num_blocks):
        assert nf1.block_size(i) == nf2.block_size(i)
        assert nf1.blocks[i].data.keys() == nf2.blocks[i].data.keys()
        for k in nf1.blocks[i].data:
            assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k])

def _assert_is_identical_batchedgraph(bg1, bg2):
    _assert_is_identical(bg1, bg2)
    assert bg1.batch_size == bg2.batch_size
    assert bg1.batch_num_nodes == bg2.batch_num_nodes
    assert bg1.batch_num_edges == bg2.batch_num_edges

def _assert_is_identical_index(i1, i2):
    assert i1.slice_data() == i2.slice_data()
    assert F.array_equal(i1.tousertensor(), i2.tousertensor())

Gan Quan's avatar
Gan Quan committed
84
85
86
87
88
89
90
91
92
93
def _reconstruct_pickle(obj):
    f = io.BytesIO()
    pickle.dump(obj, f)
    f.seek(0)
    obj = pickle.load(f)
    f.close()

    return obj

def test_pickling_index():
94
    # normal index
Gan Quan's avatar
Gan Quan committed
95
96
97
98
    i = toindex([1, 2, 3])
    i.tousertensor()
    i.todgltensor() # construct a dgl tensor which is unpicklable
    i2 = _reconstruct_pickle(i)
99
    _assert_is_identical_index(i, i2)
Gan Quan's avatar
Gan Quan committed
100

101
102
103
104
    # slice index
    i = toindex(slice(5, 10))
    i2 = _reconstruct_pickle(i)
    _assert_is_identical_index(i, i2)
Gan Quan's avatar
Gan Quan committed
105
106

def test_pickling_graph_index():
107
    gi = create_graph_index(None, False)
Gan Quan's avatar
Gan Quan committed
108
109
110
111
112
113
114
115
116
    gi.add_nodes(3)
    src_idx = toindex([0, 0])
    dst_idx = toindex([1, 2])
    gi.add_edges(src_idx, dst_idx)

    gi2 = _reconstruct_pickle(gi)

    assert gi2.number_of_nodes() == gi.number_of_nodes()
    src_idx2, dst_idx2, _ = gi2.edges()
117
118
    assert F.array_equal(src_idx.tousertensor(), src_idx2.tousertensor())
    assert F.array_equal(dst_idx.tousertensor(), dst_idx2.tousertensor())
Gan Quan's avatar
Gan Quan committed
119
120
121


def test_pickling_frame():
122
123
    x = F.randn((3, 7))
    y = F.randn((3, 5))
Gan Quan's avatar
Gan Quan committed
124
125
126
127

    c = Column(x)

    c2 = _reconstruct_pickle(c)
128
    assert F.allclose(c.data, c2.data)
Gan Quan's avatar
Gan Quan committed
129
130
131
132

    fr = Frame({'x': x, 'y': y})

    fr2 = _reconstruct_pickle(fr)
133
134
    assert F.allclose(fr2['x'].data, x)
    assert F.allclose(fr2['y'].data, y)
Gan Quan's avatar
Gan Quan committed
135
136
137
138
139
140
141
142
143
144
145

    fr = Frame()


def _global_message_func(nodes):
    return {'x': nodes.data['x']}

def test_pickling_graph():
    # graph structures and frames are pickled
    g = dgl.DGLGraph()
    g.add_nodes(3)
146
147
    src = F.tensor([0, 0])
    dst = F.tensor([1, 2])
Gan Quan's avatar
Gan Quan committed
148
149
    g.add_edges(src, dst)

150
151
152
153
    x = F.randn((3, 7))
    y = F.randn((3, 5))
    a = F.randn((2, 6))
    b = F.randn((2, 4))
Gan Quan's avatar
Gan Quan committed
154
155
156
157
158
159
160
161

    g.ndata['x'] = x
    g.ndata['y'] = y
    g.edata['a'] = a
    g.edata['b'] = b

    # registered functions are pickled
    g.register_message_func(_global_message_func)
162
    reduce_func = fn.sum('x', 'x')
Gan Quan's avatar
Gan Quan committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
    g.register_reduce_func(reduce_func)

    # custom attributes should be pickled
    g.foo = 2

    new_g = _reconstruct_pickle(g)

    _assert_is_identical(g, new_g)
    assert new_g.foo == 2
    assert new_g._message_func == _global_message_func
    assert isinstance(new_g._reduce_func, type(reduce_func))
    assert new_g._reduce_func._name == 'sum'
    assert new_g._reduce_func.msg_field == 'x'
    assert new_g._reduce_func.out_field == 'x'

    # test batched graph with partial set case
    g2 = dgl.DGLGraph()
    g2.add_nodes(4)
181
182
    src2 = F.tensor([0, 1])
    dst2 = F.tensor([2, 3])
Gan Quan's avatar
Gan Quan committed
183
184
    g2.add_edges(src2, dst2)

185
186
187
188
    x2 = F.randn((4, 7))
    y2 = F.randn((3, 5))
    a2 = F.randn((2, 6))
    b2 = F.randn((2, 4))
Gan Quan's avatar
Gan Quan committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

    g2.ndata['x'] = x2
    g2.nodes[[0, 1, 3]].data['y'] = y2
    g2.edata['a'] = a2
    g2.edata['b'] = b2

    bg = dgl.batch([g, g2])

    bg2 = _reconstruct_pickle(bg)

    _assert_is_identical(bg, bg2)
    new_g, new_g2 = dgl.unbatch(bg2)
    _assert_is_identical(g, new_g)
    _assert_is_identical(g2, new_g2)

204
205
206
207
208
209
    # readonly graph
    g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # multigraph
210
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)])
211
212
213
214
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # readonly multigraph
215
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], readonly=True)
216
217
218
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

219
220
221
222
223
224
225
226
227
def test_pickling_nodeflow():
    elist = [(0, 1), (1, 2), (2, 3), (3, 0)]
    g = dgl.DGLGraph(elist, readonly=True)
    g.ndata['x'] = F.randn((4, 5))
    g.edata['y'] = F.randn((4, 3))
    nf = contrib.sampling.sampler.create_full_nodeflow(g, 5)
    nf.copy_from_parent()  # add features
    new_nf = _reconstruct_pickle(nf)
    _assert_is_identical_nodeflow(nf, new_nf)
Gan Quan's avatar
Gan Quan committed
228

229
230
231
232
233
234
235
236
237
def test_pickling_batched_graph():
    glist = [nx.path_graph(i + 5) for i in range(5)]
    glist = [dgl.DGLGraph(g) for g in glist]
    bg = dgl.batch(glist)
    bg.ndata['x'] = F.randn((35, 5))
    bg.edata['y'] = F.randn((60, 3))
    new_bg = _reconstruct_pickle(bg)
    _assert_is_identical_batchedgraph(bg, new_bg)

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def test_pickling_heterograph():
    # copied from test_heterograph.create_test_heterograph()
    plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1])))
    wishes_nx = nx.DiGraph()
    wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0)
    wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1)
    wishes_nx.add_edge('u0', 'g1', id=0)
    wishes_nx.add_edge('u2', 'g0', id=1)

    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game')
    wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game')
    develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
    g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g])

    g.nodes['user'].data['u_h'] = F.randn((3, 4))
    g.nodes['game'].data['g_h'] = F.randn((2, 5))
    g.edges['plays'].data['p_h'] = F.randn((4, 6))

    new_g = _reconstruct_pickle(g)
    _assert_is_identical_hetero(g, new_g)


Gan Quan's avatar
Gan Quan committed
261
262
263
264
265
if __name__ == '__main__':
    test_pickling_index()
    test_pickling_graph_index()
    test_pickling_frame()
    test_pickling_graph()
266
    test_pickling_nodeflow()
267
    test_pickling_batched_graph()
268
    test_pickling_heterograph()