test_serialize.py 12.2 KB
Newer Older
VoVAllen's avatar
VoVAllen committed
1
2
3
4
5
6
import backend as F
import numpy as np
import scipy as sp
import time
import tempfile
import os
7
import pytest
8
import unittest
VoVAllen's avatar
VoVAllen committed
9
10
11

from dgl import DGLGraph
import dgl
12
import dgl.ndarray as nd
13
from dgl.data.utils import load_labels, save_tensors, load_tensors
VoVAllen's avatar
VoVAllen committed
14
15
16
17

np.random.seed(44)


18
def generate_rand_graph(n, is_hetero):
VoVAllen's avatar
VoVAllen committed
19
20
    arr = (sp.sparse.random(n, n, density=0.1,
                            format='coo') != 0).astype(np.int64)
21
    if is_hetero:
22
        return dgl.from_scipy(arr)
23
24
    else:
        return DGLGraph(arr, readonly=True)
VoVAllen's avatar
VoVAllen committed
25
26


27
def construct_graph(n, is_hetero):
VoVAllen's avatar
VoVAllen committed
28
29
    g_list = []
    for i in range(n):
30
        g = generate_rand_graph(30, is_hetero)
VoVAllen's avatar
VoVAllen committed
31
32
33
34
35
36
37
        g.edata['e1'] = F.randn((g.number_of_edges(), 32))
        g.edata['e2'] = F.ones((g.number_of_edges(), 32))
        g.ndata['n1'] = F.randn((g.number_of_nodes(), 64))
        g_list.append(g)
    return g_list


38
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
39
40
@pytest.mark.parametrize('is_hetero', [True, False])
def test_graph_serialize_with_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
41
42
43
44
    num_graphs = 100

    t0 = time.time()

45
    g_list = construct_graph(num_graphs, is_hetero)
VoVAllen's avatar
VoVAllen committed
46
47
48
49
50
51
52
53

    t1 = time.time()

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

54
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
55
56
57

    t2 = time.time()
    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
58
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

    t3 = time.time()
    idx = idx_list[0]
    load_g = loadg_list[0]
    print("Save time: {} s".format(t2 - t1))
    print("Load time: {} s".format(t3 - t2))
    print("Graph Construction time: {} s".format(t1 - t0))

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges('uv', 'eid')
    g_edges = g_list[idx].all_edges('uv', 'eid')
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])
    assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1'])
    assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2'])
    assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1'])

    os.unlink(path)


80
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
81
82
@pytest.mark.parametrize('is_hetero', [True, False])
def test_graph_serialize_without_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
83
    num_graphs = 100
84
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
85
86
87
88
89
90

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

91
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
92
93

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
94
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
95
96
97
98
99
100
101
102
103
104
105
106
107

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges('uv', 'eid')
    g_edges = g_list[idx].all_edges('uv', 'eid')
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)

108
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
109
110
@pytest.mark.parametrize('is_hetero', [True, False])
def test_graph_serialize_with_labels(is_hetero):
VoVAllen's avatar
VoVAllen committed
111
    num_graphs = 100
112
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
113
114
115
116
117
118
119
    labels = {"label": F.zeros((num_graphs, 1))}

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

120
    dgl.save_graphs(path, g_list, labels)
VoVAllen's avatar
VoVAllen committed
121
122

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
123
    loadg_list, l_labels0 = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
    l_labels = load_labels(path)
    assert F.allclose(l_labels['label'], labels['label'])
    assert F.allclose(l_labels0['label'], labels['label'])

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges('uv', 'eid')
    g_edges = g_list[idx].all_edges('uv', 'eid')
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def test_serialize_tensors():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    tensor_dict = {"a": F.tensor(
        [1, 3, -1, 0], dtype=F.int64), "1@1": F.tensor([1.5, 2], dtype=F.float32)}

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)

    for key in tensor_dict:
        assert key in load_tensor_dict
        assert np.array_equal(
            F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key]))

    load_nd_dict = load_tensors(path, return_dgl_ndarray=True)

    for key in tensor_dict:
        assert key in load_nd_dict
        assert isinstance(load_nd_dict[key], nd.NDArray)
        assert np.array_equal(
            load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key]))

    os.unlink(path)

169

170
171
172
173
174
175
176
177
178
179
180
181
def test_serialize_empty_dict():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    tensor_dict = {}

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)
    assert isinstance(load_tensor_dict, dict)
182
    assert len(load_tensor_dict) == 0
183
184

    os.unlink(path)
185

186
187

def test_load_old_files1():
188
    loadg_list, _ = dgl.load_graphs(os.path.join(
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
        os.path.dirname(__file__), "data/1.bin"))
    idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
        os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True)

    load_g = loadg_list[idx]
    load_edges = load_g.all_edges('uv', 'eid')

    assert np.allclose(F.asnumpy(load_edges[0]), edge0)
    assert np.allclose(F.asnumpy(load_edges[1]), edge1)
    assert np.allclose(F.asnumpy(load_g.edata['e1']), edata_e1)
    assert np.allclose(F.asnumpy(load_g.edata['e2']), edata_e2)
    assert np.allclose(F.asnumpy(load_g.ndata['n1']), ndata_n1)


def test_load_old_files2():
204
    loadg_list, labels0 = dgl.load_graphs(os.path.join(
205
206
207
208
209
210
211
212
213
        os.path.dirname(__file__), "data/2.bin"))
    labels1 = load_labels(os.path.join(
        os.path.dirname(__file__), "data/2.bin"))
    idx, edges0, edges1, np_labels = np.load(os.path.join(
        os.path.dirname(__file__), "data/2.npy"), allow_pickle=True)
    assert np.allclose(F.asnumpy(labels0['label']), np_labels)
    assert np.allclose(F.asnumpy(labels1['label']), np_labels)

    load_g = loadg_list[idx]
214
    print(load_g)
215
216
217
218
219
    load_edges = load_g.all_edges('uv', 'eid')
    assert np.allclose(F.asnumpy(load_edges[0]), edges0)
    assert np.allclose(F.asnumpy(load_edges[1]), edges1)


220
def create_heterographs(idtype):
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
    g_x = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype)
    g_y = dgl.heterograph({
        ('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr')
    g_x.ndata['h'] = F.randn((4, 3))
    g_x.edata['w'] = F.randn((3, 2))
    g_y.ndata['hh'] = F.ones((4, 5))
    g_y.edata['ww'] = F.randn((2, 10))
    g = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]),
        ('user', 'knows', 'user'): ([0, 2], [2, 3])
    }, idtype=idtype)
    g.nodes['user'].data['h'] = g_x.ndata['h']
    g.nodes['user'].data['hh'] = g_y.ndata['hh']
    g.edges['follows'].data['w'] = g_x.edata['w']
    g.edges['knows'].data['ww'] = g_y.edata['ww']
237
238
    return [g, g_x, g_y]

239
def create_heterographs2(idtype):
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    g_x = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype)
    g_y = dgl.heterograph({
        ('user', 'knows', 'user'): ([0, 2], [2, 3])}, idtype=idtype).formats('csr')
    g_z = dgl.heterograph(
        {('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4])}, idtype=idtype)
    g_x.ndata['h'] = F.randn((4, 3))
    g_x.edata['w'] = F.randn((3, 2))
    g_y.ndata['hh'] = F.ones((4, 5))
    g_y.edata['ww'] = F.randn((2, 10))
    g = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 3]),
        ('user', 'knows', 'user'): ([0, 2], [2, 3]),
        ('user', 'knows', 'knowledge'): ([0, 1, 3], [2, 3, 4])
    }, idtype=idtype)
    g.nodes['user'].data['h'] = g_x.ndata['h']
    g.edges['follows'].data['w'] = g_x.edata['w']
    g.nodes['user'].data['hh'] = g_y.ndata['hh']
    g.edges[('user', 'knows', 'user')].data['ww'] = g_y.edata['ww']
259
    return [g, g_x, g_y, g_z]
260
261
262
263

def test_deserialize_old_heterograph_file():
    path = os.path.join(
        os.path.dirname(__file__), "data/hetero1.bin")
264
    g_list, label_dict = dgl.load_graphs(path)
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    assert g_list[0].idtype == F.int64
    assert g_list[3].idtype == F.int32
    assert np.allclose(
        F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
    assert np.allclose(
        F.asnumpy(g_list[5].nodes['user'].data['hh']), np.ones((4, 5)))
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    assert F.allclose(label_dict['graph_label'], F.ones(54))

def create_old_heterograph_files():
    path = os.path.join(
        os.path.dirname(__file__), "data/hetero1.bin")
279
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
280
    labels_dict = {"graph_label": F.ones(54)}
281
    dgl.save_graphs(path, g_list0, labels_dict)
282
283


284
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
285
286
287
288
def test_serialize_heterograph():
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()
289
    g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32)
290
    dgl.save_graphs(path, g_list0)
291

292
    g_list, _ = dgl.load_graphs(path)
293
    assert g_list[0].idtype == F.int64
294
295
296
297
    assert len(g_list[0].canonical_etypes) == 3
    for i in range(len(g_list0)):
        for j, etypes in enumerate(g_list0[i].canonical_etypes):
            assert g_list[i].canonical_etypes[j] == etypes
298
299
300
    #assert g_list[1].restrict_format() == 'any'
    #assert g_list[2].restrict_format() == 'csr'

301
    assert g_list[4].idtype == F.int32
302
303
304
    assert np.allclose(
        F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
    assert np.allclose(
305
        F.asnumpy(g_list[6].nodes['user'].data['hh']), np.ones((4, 5)))
306
307
308
309
310
311
312
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    for i in range(len(g_list)):
        assert g_list[i].ntypes == g_list0[i].ntypes
        assert g_list[i].etypes == g_list0[i].etypes

313
314
315
316
    # test set feature after load_graph
    g_list[3].nodes['user'].data['test'] = F.tensor([0, 1, 2, 4])
    g_list[3].edata['test'] = F.tensor([0, 1, 2])

317
318
    os.unlink(path)

319
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
320
321
322
@pytest.mark.skip(reason="lack of permission on CI")
def test_serialize_heterograph_s3():
    path = "s3://dglci-data-test/graph2.bin"
323
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
324
    dgl.save_graphs(path, g_list0)
325

326
    g_list = dgl.load_graphs(path, [0, 2, 5])
327
    assert g_list[0].idtype == F.int64
328
    #assert g_list[1].restrict_format() == 'csr'
329
330
331
332
333
334
335
336
337
338
    assert np.allclose(
        F.asnumpy(g_list[1].nodes['user'].data['hh']), np.ones((4, 5)))
    assert np.allclose(
        F.asnumpy(g_list[2].nodes['user'].data['hh']), np.ones((4, 5)))
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))



VoVAllen's avatar
VoVAllen committed
339
if __name__ == "__main__":
340
    pass
341
342
343
344
345
346
347
348
    #test_graph_serialize_with_feature(True)
    #test_graph_serialize_with_feature(False)
    #test_graph_serialize_without_feature(True)
    #test_graph_serialize_without_feature(False)
    #test_graph_serialize_with_labels(True)
    #test_graph_serialize_with_labels(False)
    #test_serialize_tensors()
    #test_serialize_empty_dict()
349
350
    # test_load_old_files1()
    test_load_old_files2()
351
352
353
354
    #test_serialize_heterograph()
    #test_serialize_heterograph_s3()
    #test_deserialize_old_heterograph_file()
    #create_old_heterograph_files()