"vscode:/vscode.git/clone" did not exist on "234bba0cdc7517647e58764a2d5b99375b0d0579"
test_serialize.py 12.3 KB
Newer Older
1
2
3
4
5
import os
import tempfile
import time
import unittest

VoVAllen's avatar
VoVAllen committed
6
7
import backend as F
import numpy as np
8
import pytest
9
import scipy as sp
VoVAllen's avatar
VoVAllen committed
10
11

import dgl
12
import dgl.ndarray as nd
13
14
from dgl import DGLGraph
from dgl.data.utils import load_labels, load_tensors, save_tensors
VoVAllen's avatar
VoVAllen committed
15
16
17
18

np.random.seed(44)


19
def generate_rand_graph(n, is_hetero):
20
21
22
    arr = (sp.sparse.random(n, n, density=0.1, format="coo") != 0).astype(
        np.int64
    )
23
    if is_hetero:
24
        return dgl.from_scipy(arr)
25
26
    else:
        return DGLGraph(arr, readonly=True)
VoVAllen's avatar
VoVAllen committed
27
28


29
def construct_graph(n, is_hetero):
VoVAllen's avatar
VoVAllen committed
30
31
    g_list = []
    for i in range(n):
32
        g = generate_rand_graph(30, is_hetero)
33
34
35
        g.edata["e1"] = F.randn((g.number_of_edges(), 32))
        g.edata["e2"] = F.ones((g.number_of_edges(), 32))
        g.ndata["n1"] = F.randn((g.number_of_nodes(), 64))
VoVAllen's avatar
VoVAllen committed
36
37
38
39
        g_list.append(g)
    return g_list


40
41
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
42
def test_graph_serialize_with_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
43
44
45
46
    num_graphs = 100

    t0 = time.time()

47
    g_list = construct_graph(num_graphs, is_hetero)
VoVAllen's avatar
VoVAllen committed
48
49
50
51
52
53
54
55

    t1 = time.time()

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

56
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
57
58
59

    t2 = time.time()
    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
60
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
61
62
63
64
65
66
67
68
69
70

    t3 = time.time()
    idx = idx_list[0]
    load_g = loadg_list[0]
    print("Save time: {} s".format(t2 - t1))
    print("Load time: {} s".format(t3 - t2))
    print("Graph Construction time: {} s".format(t1 - t0))

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

71
72
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
73
74
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])
75
76
77
    assert F.allclose(load_g.edata["e1"], g_list[idx].edata["e1"])
    assert F.allclose(load_g.edata["e2"], g_list[idx].edata["e2"])
    assert F.allclose(load_g.ndata["n1"], g_list[idx].ndata["n1"])
VoVAllen's avatar
VoVAllen committed
78
79
80
81

    os.unlink(path)


82
83
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
84
def test_graph_serialize_without_feature(is_hetero):
VoVAllen's avatar
VoVAllen committed
85
    num_graphs = 100
86
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
87
88
89
90
91
92

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

93
    dgl.save_graphs(path, g_list)
VoVAllen's avatar
VoVAllen committed
94
95

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
96
    loadg_list, _ = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
97
98
99
100
101
102

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

103
104
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
105
106
107
108
109
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)

110
111
112

@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
@pytest.mark.parametrize("is_hetero", [True, False])
113
def test_graph_serialize_with_labels(is_hetero):
VoVAllen's avatar
VoVAllen committed
114
    num_graphs = 100
115
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]
VoVAllen's avatar
VoVAllen committed
116
117
118
119
120
121
122
    labels = {"label": F.zeros((num_graphs, 1))}

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

123
    dgl.save_graphs(path, g_list, labels)
VoVAllen's avatar
VoVAllen committed
124
125

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
126
    loadg_list, l_labels0 = dgl.load_graphs(path, idx_list)
VoVAllen's avatar
VoVAllen committed
127
    l_labels = load_labels(path)
128
129
    assert F.allclose(l_labels["label"], labels["label"])
    assert F.allclose(l_labels0["label"], labels["label"])
VoVAllen's avatar
VoVAllen committed
130
131
132
133
134
135

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

136
137
    load_edges = load_g.all_edges("uv", "eid")
    g_edges = g_list[idx].all_edges("uv", "eid")
VoVAllen's avatar
VoVAllen committed
138
139
140
141
142
143
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)


144
145
146
147
148
149
def test_serialize_tensors():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

150
151
152
153
    tensor_dict = {
        "a": F.tensor([1, 3, -1, 0], dtype=F.int64),
        "1@1": F.tensor([1.5, 2], dtype=F.float32),
    }
154
155
156
157
158
159
160
161

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)

    for key in tensor_dict:
        assert key in load_tensor_dict
        assert np.array_equal(
162
163
            F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key])
        )
164
165
166
167
168
169
170

    load_nd_dict = load_tensors(path, return_dgl_ndarray=True)

    for key in tensor_dict:
        assert key in load_nd_dict
        assert isinstance(load_nd_dict[key], nd.NDArray)
        assert np.array_equal(
171
172
            load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key])
        )
173
174
175

    os.unlink(path)

176

177
178
179
180
181
182
183
184
185
186
187
188
def test_serialize_empty_dict():
    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    tensor_dict = {}

    save_tensors(path, tensor_dict)

    load_tensor_dict = load_tensors(path)
    assert isinstance(load_tensor_dict, dict)
189
    assert len(load_tensor_dict) == 0
190
191

    os.unlink(path)
192

193
194

def test_load_old_files1():
195
196
197
    loadg_list, _ = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/1.bin")
    )
198
    idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
199
200
        os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True
    )
201
202

    load_g = loadg_list[idx]
203
    load_edges = load_g.all_edges("uv", "eid")
204
205
206

    assert np.allclose(F.asnumpy(load_edges[0]), edge0)
    assert np.allclose(F.asnumpy(load_edges[1]), edge1)
207
208
209
    assert np.allclose(F.asnumpy(load_g.edata["e1"]), edata_e1)
    assert np.allclose(F.asnumpy(load_g.edata["e2"]), edata_e2)
    assert np.allclose(F.asnumpy(load_g.ndata["n1"]), ndata_n1)
210
211
212


def test_load_old_files2():
213
214
215
216
217
218
219
220
221
    loadg_list, labels0 = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/2.bin")
    )
    labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin"))
    idx, edges0, edges1, np_labels = np.load(
        os.path.join(os.path.dirname(__file__), "data/2.npy"), allow_pickle=True
    )
    assert np.allclose(F.asnumpy(labels0["label"]), np_labels)
    assert np.allclose(F.asnumpy(labels1["label"]), np_labels)
222
223

    load_g = loadg_list[idx]
224
    print(load_g)
225
    load_edges = load_g.all_edges("uv", "eid")
226
227
228
229
    assert np.allclose(F.asnumpy(load_edges[0]), edges0)
    assert np.allclose(F.asnumpy(load_edges[1]), edges1)


230
def create_heterographs(idtype):
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
    g_x = dgl.heterograph(
        {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
    )
    g_y = dgl.heterograph(
        {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
    ).formats("csr")
    g_x.ndata["h"] = F.randn((4, 3))
    g_x.edata["w"] = F.randn((3, 2))
    g_y.ndata["hh"] = F.ones((4, 5))
    g_y.edata["ww"] = F.randn((2, 10))
    g = dgl.heterograph(
        {
            ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
            ("user", "knows", "user"): ([0, 2], [2, 3]),
        },
        idtype=idtype,
    )
    g.nodes["user"].data["h"] = g_x.ndata["h"]
    g.nodes["user"].data["hh"] = g_y.ndata["hh"]
    g.edges["follows"].data["w"] = g_x.edata["w"]
    g.edges["knows"].data["ww"] = g_y.edata["ww"]
252
253
    return [g, g_x, g_y]

254

255
def create_heterographs2(idtype):
256
257
258
259
260
261
    g_x = dgl.heterograph(
        {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype
    )
    g_y = dgl.heterograph(
        {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype
    ).formats("csr")
262
    g_z = dgl.heterograph(
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
        {("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4])}, idtype=idtype
    )
    g_x.ndata["h"] = F.randn((4, 3))
    g_x.edata["w"] = F.randn((3, 2))
    g_y.ndata["hh"] = F.ones((4, 5))
    g_y.edata["ww"] = F.randn((2, 10))
    g = dgl.heterograph(
        {
            ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]),
            ("user", "knows", "user"): ([0, 2], [2, 3]),
            ("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4]),
        },
        idtype=idtype,
    )
    g.nodes["user"].data["h"] = g_x.ndata["h"]
    g.edges["follows"].data["w"] = g_x.edata["w"]
    g.nodes["user"].data["hh"] = g_y.ndata["hh"]
    g.edges[("user", "knows", "user")].data["ww"] = g_y.edata["ww"]
281
    return [g, g_x, g_y, g_z]
282

283

284
def test_deserialize_old_heterograph_file():
285
    path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
286
    g_list, label_dict = dgl.load_graphs(path)
287
288
289
    assert g_list[0].idtype == F.int64
    assert g_list[3].idtype == F.int32
    assert np.allclose(
290
291
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
292
    assert np.allclose(
293
294
295
        F.asnumpy(g_list[5].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
296
297
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
298
299
    assert F.allclose(label_dict["graph_label"], F.ones(54))

300
301

def create_old_heterograph_files():
302
    path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
303
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
304
    labels_dict = {"graph_label": F.ones(54)}
305
    dgl.save_graphs(path, g_list0, labels_dict)
306
307


308
@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
309
310
311
312
def test_serialize_heterograph():
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()
313
    g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32)
314
    dgl.save_graphs(path, g_list0)
315

316
    g_list, _ = dgl.load_graphs(path)
317
    assert g_list[0].idtype == F.int64
318
319
320
321
    assert len(g_list[0].canonical_etypes) == 3
    for i in range(len(g_list0)):
        for j, etypes in enumerate(g_list0[i].canonical_etypes):
            assert g_list[i].canonical_etypes[j] == etypes
322
323
    # assert g_list[1].restrict_format() == 'any'
    # assert g_list[2].restrict_format() == 'csr'
324

325
    assert g_list[4].idtype == F.int32
326
    assert np.allclose(
327
328
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
329
    assert np.allclose(
330
331
332
        F.asnumpy(g_list[6].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
333
334
335
336
337
338
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    for i in range(len(g_list)):
        assert g_list[i].ntypes == g_list0[i].ntypes
        assert g_list[i].etypes == g_list0[i].etypes

339
    # test set feature after load_graph
340
341
    g_list[3].nodes["user"].data["test"] = F.tensor([0, 1, 2, 4])
    g_list[3].edata["test"] = F.tensor([0, 1, 2])
342

343
344
    os.unlink(path)

345
346

@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
347
348
349
@pytest.mark.skip(reason="lack of permission on CI")
def test_serialize_heterograph_s3():
    path = "s3://dglci-data-test/graph2.bin"
350
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
351
    dgl.save_graphs(path, g_list0)
352

353
    g_list = dgl.load_graphs(path, [0, 2, 5])
354
    assert g_list[0].idtype == F.int64
355
    # assert g_list[1].restrict_format() == 'csr'
356
    assert np.allclose(
357
358
        F.asnumpy(g_list[1].nodes["user"].data["hh"]), np.ones((4, 5))
    )
359
    assert np.allclose(
360
361
362
        F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5))
    )
    edges = g_list[0]["follows"].edges()
363
364
365
366
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))


VoVAllen's avatar
VoVAllen committed
367
if __name__ == "__main__":
368
    pass
369
370
371
372
373
374
375
376
    # test_graph_serialize_with_feature(True)
    # test_graph_serialize_with_feature(False)
    # test_graph_serialize_without_feature(True)
    # test_graph_serialize_without_feature(False)
    # test_graph_serialize_with_labels(True)
    # test_graph_serialize_with_labels(False)
    # test_serialize_tensors()
    # test_serialize_empty_dict()
377
378
    # test_load_old_files1()
    test_load_old_files2()
379
380
381
382
    # test_serialize_heterograph()
    # test_serialize_heterograph_s3()
    # test_deserialize_old_heterograph_file()
    # create_old_heterograph_files()