test_serialize.py 14.1 KB
Newer Older
1
2
3
4
5
import os
import tempfile
import time
import unittest

VoVAllen's avatar
VoVAllen committed
6
7
8
import backend as F

import dgl
9
import dgl.ndarray as nd
10
11
12
import numpy as np
import pytest
import scipy as sp
13
14
from dgl import DGLGraph
from dgl.data.utils import load_labels, load_tensors, save_tensors
VoVAllen's avatar
VoVAllen committed
15
16
17
18

np.random.seed(44)


19
def generate_rand_graph(n, is_hetero):
20
21
22
    arr = (sp.sparse.random(n, n, density=0.1, format="coo") != 0).astype(
        np.int64
    )
23
    if is_hetero:
24
        return dgl.from_scipy(arr)
25
26
    else:
        return DGLGraph(arr, readonly=True)
VoVAllen's avatar
VoVAllen committed
27
28


29
def construct_graph(n, is_hetero):
VoVAllen's avatar
VoVAllen committed
30
31
    g_list = []
    for i in range(n):
32
        g = generate_rand_graph(30, is_hetero)
Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
33
34
35
        g.edata["e1"] = F.randn((g.num_edges(), 32))
        g.edata["e2"] = F.ones((g.num_edges(), 32))
        g.ndata["n1"] = F.randn((g.num_nodes(), 64))
VoVAllen's avatar
VoVAllen committed
36
37
38
39
        g_list.append(g)
    return g_list


40
41
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
42
def test_graph_serialize_with_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
43
44
45
46
    num_graphs = 100

    t0 = time.time()

47
    g_list = construct_graph(num_graphs, is_hetero)
VoVAllen's avatar
VoVAllen committed
48
49
50
51
52
53
54
55

    t1 = time.time()

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

56
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
57
58
59

    t2 = time.time()
    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
60
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
61
62
63
64
65
66
67
68
69
70

    t3 = time.time()
    idx = idx_list[0]
    load_g = loadg_list[0]
    print("Save time: {} s".format(t2 - t1))
    print("Load time: {} s".format(t3 - t2))
    print("Graph Construction time: {} s".format(t1 - t0))

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

71
72
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
73
74
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])
75
76
77
    assert F.allclose(load_g.edata["e1"], g_list[idx].edata["e1"])
    assert F.allclose(load_g.edata["e2"], g_list[idx].edata["e2"])
    assert F.allclose(load_g.ndata["n1"], g_list[idx].ndata["n1"])
VoVAllen's avatar
VoVAllen committed
78
79
80
81

    os.unlink(path)


82
83
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
84
def test_graph_serialize_without_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
85
    num_graphs = 100
86
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
87
88
89
90
91
92

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

93
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
94
95

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
96
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
97
98
99
100
101
102

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

103
104
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
105
106
107
108
109
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)

110
111
112

@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
113
def test_graph_serialize_with_labels(is_hetero):
VoVAllen's avatar
VoVAllen committed
114
    num_graphs = 100
115
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
116
117
118
119
120
121
122
    labels = {"label": F.zeros((num_graphs, 1))}

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

123
    dgl.save_graphs(path, g_list, labels)
VoVAllen's avatar
VoVAllen committed
124
125

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
126
    loadg_list, l_labels0 = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
127
    l_labels = load_labels(path)
128
129
    assert F.allclose(l_labels["label"], labels["label"])
    assert F.allclose(l_labels0["label"], labels["label"])
VoVAllen's avatar
VoVAllen committed
130
131
132
133
134
135

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

136
137
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
138
139
140
141
142
143
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)


144
145
146
147
148
149
def test_serialize_tensors():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

150
151
152
153
    tensor_dict = {
        "a": F.tensor([1, 3, -1, 0], dtype=F.int64),
        "1@1": F.tensor([1.5, 2], dtype=F.float32),
    }
154
155
156
157
158
159
160
161

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)

    for key in tensor_dict:
        assert key in load_tensor_dict
        assert np.array_equal(
162
163
            F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key])
        )
164
165
166
167
168
169
170

    load_nd_dict = load_tensors(path, return_dgl_ndarray=True)

    for key in tensor_dict:
        assert key in load_nd_dict
        assert isinstance(load_nd_dict[key], nd.NDArray)
        assert np.array_equal(
171
172
            load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key])
        )
173
174
175

    os.unlink(path)

176

177
178
179
180
181
182
183
184
185
186
187
188
def test_serialize_empty_dict():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    tensor_dict = {}

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)
    assert isinstance(load_tensor_dict, dict)
189
    assert len(load_tensor_dict) == 0
190
191

    os.unlink(path)
192

193
194

def test_load_old_files1():
195
196
197
    loadg_list, _ = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/1.bin")
    )
198
    idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
199
200
        os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True
    )
201
202

    load_g = loadg_list[idx]
203
    load_edges = load_g.all_edges("uv", "eid")
204
205
206

    assert np.allclose(F.asnumpy(load_edges[0]), edge0)
    assert np.allclose(F.asnumpy(load_edges[1]), edge1)
207
208
209
    assert np.allclose(F.asnumpy(load_g.edata["e1"]), edata_e1)
    assert np.allclose(F.asnumpy(load_g.edata["e2"]), edata_e2)
    assert np.allclose(F.asnumpy(load_g.ndata["n1"]), ndata_n1)
210
211
212


def test_load_old_files2():
213
214
215
216
217
218
219
220
221
    loadg_list, labels0 = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/2.bin")
    )
    labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin"))
    idx, edges0, edges1, np_labels = np.load(
        os.path.join(os.path.dirname(__file__), "data/2.npy"), allow_pickle=True
    )
    assert np.allclose(F.asnumpy(labels0["label"]), np_labels)
    assert np.allclose(F.asnumpy(labels1["label"]), np_labels)
222
223

    load_g = loadg_list[idx]
224
    print(load_g)
225
    load_edges = load_g.all_edges("uv", "eid")
226
227
228
229
    assert np.allclose(F.asnumpy(load_edges[0]), edges0)
    assert np.allclose(F.asnumpy(load_edges[1]), edges1)


230
def create_heterographs(idtype):
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    g_x = dgl.heterograph(
        {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
    )
    g_y = dgl.heterograph(
        {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
    ).formats("csr")
    g_x.ndata["h"] = F.randn((4, 3))
    g_x.edata["w"] = F.randn((3, 2))
    g_y.ndata["hh"] = F.ones((4, 5))
    g_y.edata["ww"] = F.randn((2, 10))
    g = dgl.heterograph(
        {
            ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
            ("user", "knows", "user"): ([0, 2], [2, 3]),
        },
        idtype=idtype,
    )
    g.nodes["user"].data["h"] = g_x.ndata["h"]
    g.nodes["user"].data["hh"] = g_y.ndata["hh"]
    g.edges["follows"].data["w"] = g_x.edata["w"]
    g.edges["knows"].data["ww"] = g_y.edata["ww"]
252
253
    return [g, g_x, g_y]

254

255
def create_heterographs2(idtype):
256
257
258
259
260
261
    g_x = dgl.heterograph(
        {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
    )
    g_y = dgl.heterograph(
        {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
    ).formats("csr")
262
    g_z = dgl.heterograph(
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
        {("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4])}, idtype=idtype
    )
    g_x.ndata["h"] = F.randn((4, 3))
    g_x.edata["w"] = F.randn((3, 2))
    g_y.ndata["hh"] = F.ones((4, 5))
    g_y.edata["ww"] = F.randn((2, 10))
    g = dgl.heterograph(
        {
            ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
            ("user", "knows", "user"): ([0, 2], [2, 3]),
            ("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4]),
        },
        idtype=idtype,
    )
    g.nodes["user"].data["h"] = g_x.ndata["h"]
    g.edges["follows"].data["w"] = g_x.edata["w"]
    g.nodes["user"].data["hh"] = g_y.ndata["hh"]
    g.edges[("user", "knows", "user")].data["ww"] = g_y.edata["ww"]
281
    return [g, g_x, g_y, g_z]
282

283

284
def test_deserialize_old_heterograph_file():
285
    path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
286
    g_list, label_dict = dgl.load_graphs(path)
287
288
289
    assert g_list[0].idtype == F.int64
    assert g_list[3].idtype == F.int32
    assert np.allclose(
290
291
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
292
    assert np.allclose(
293
294
295
        F.asnumpy(g_list[5].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
296
297
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
298
299
    assert F.allclose(label_dict["graph_label"], F.ones(54))

300
301

def create_old_heterograph_files():
302
    path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
303
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
304
    labels_dict = {"graph_label": F.ones(54)}
305
    dgl.save_graphs(path, g_list0, labels_dict)
306
307


308
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
309
310
311
312
def test_serialize_heterograph():
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()
313
    g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32)
314
    dgl.save_graphs(path, g_list0)
315

316
    g_list, _ = dgl.load_graphs(path)
317
    assert g_list[0].idtype == F.int64
318
319
320
321
    assert len(g_list[0].canonical_etypes) == 3
    for i in range(len(g_list0)):
        for j, etypes in enumerate(g_list0[i].canonical_etypes):
            assert g_list[i].canonical_etypes[j] == etypes
322
323
    # assert g_list[1].restrict_format() == 'any'
    # assert g_list[2].restrict_format() == 'csr'
324

325
    assert g_list[4].idtype == F.int32
326
    assert np.allclose(
327
328
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
329
    assert np.allclose(
330
331
332
        F.asnumpy(g_list[6].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
333
334
335
336
337
338
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    for i in range(len(g_list)):
        assert g_list[i].ntypes == g_list0[i].ntypes
        assert g_list[i].etypes == g_list0[i].etypes

339
    # test set feature after load_graph
340
341
    g_list[3].nodes["user"].data["test"] = F.tensor([0, 1, 2, 4])
    g_list[3].edata["test"] = F.tensor([0, 1, 2])
342

343
344
    os.unlink(path)

345
346

@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
347
348
349
@pytest.mark.skip(reason="lack of permission on CI")
def test_serialize_heterograph_s3():
    path = "s3://dglci-data-test/graph2.bin"
350
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
351
    dgl.save_graphs(path, g_list0)
352

353
    g_list = dgl.load_graphs(path, [0, 2, 5])
354
    assert g_list[0].idtype == F.int64
355
    # assert g_list[1].restrict_format() == 'csr'
356
    assert np.allclose(
357
358
        F.asnumpy(g_list[1].nodes["user"].data["hh"]), np.ones((4, 5))
    )
359
    assert np.allclose(
360
361
362
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
363
364
365
366
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))


367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
@pytest.mark.parametrize(
    "formats",
    [
        "coo",
        "csr",
        "csc",
        ["coo", "csc"],
        ["coo", "csr"],
        ["csc", "csr"],
        ["coo", "csr", "csc"],
    ],
)
def test_graph_serialize_with_formats(is_hetero, formats):
    num_graphs = 100
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    dgl.save_graphs(path, g_list, formats=formats)

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
    loadg_list, _ = dgl.load_graphs(path, idx_list)

    idx = idx_list[0]
    load_g = loadg_list[0]
    g_formats = load_g.formats()

    # verify formats
    if not isinstance(formats, list):
        formats = [formats]
    for fmt in formats:
        assert fmt in g_formats["created"]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)


@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
def test_graph_serialize_with_restricted_formats():
    g = dgl.rand_graph(100, 200)
    g = g.formats(["coo"])
    g_list = [g]

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    expect_except = False
    try:
        dgl.save_graphs(path, g_list, formats=["csr"])
    except:
        expect_except = True
    assert expect_except

    os.unlink(path)


@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
def test_deserialize_old_graph():
    num_nodes = 100
    num_edges = 200
    path = os.path.join(os.path.dirname(__file__), "data/graph_0.9a220622.dgl")
    g_list, _ = dgl.load_graphs(path)
    g = g_list[0]
    assert "coo" in g.formats()["created"]
    assert "csr" in g.formats()["not created"]
    assert "csc" in g.formats()["not created"]
    assert num_nodes == g.num_nodes()
    assert num_edges == g.num_edges()