"git@developer.sourcefind.cn:OpenDAS/torchani.git" did not exist on "14b9a395bdc4ab1cf09f45f1aeb0c8505d0338fa"
Unverified Commit 0fb13f7b authored by VoVAllen's avatar VoVAllen Committed by GitHub
Browse files

[Feature] Data format (#728)

* Add serialization

* add serialization

* add serialization

* lalalalalalalala

* lalalalalalalala

* serialize

* serialize

* nnn

* WIP: import tvm runtime node system

* WIP: object system

* containers

* tested basic container composition

* tested custom object

* tmp

* fix setattr bug

* tested object container return

* fix lint

* some comments about get/set state

* fix lint

* fix lint

* update cython

* fix cython

* ffi doc

* fix doc

* WIP: using object system for graph

* c++ side refactoring done; compiled

* remove stale apis

* fix bug in DGLGraphCreate; passed test_graph.py

* fix bug in python modify; passed utest for pytorch/cpu

* fix lint

* Add serialization

* Add serialization

* fix

* fix typo

* serialize with new ffi

* commit

* commit

* commit

* save

* save

* save

* save

* commit

* clean

* Delete tt2.py

* fix lint

* Add serialization

* fix lint 2

* fix lint

* fix lint

* fix lint

* fix lint

* Fix Lint

* Add serialization

* Change to Macro

* fix

* fix

* fix bugs

* refactor

* refactor

* updating dmlc-core to include force flag

* trying tempfile

* delete leaked pointer

* Fix assert

* fix assert

* add comment and test case

* add graph labels

* add load labels

* lint

* lint

* add graph labels

* lint

*  fix windows

* fix

* update dmlc-core to latest

* fix

* fix camel naming
parent 6a4b5ae9
...@@ -114,6 +114,11 @@ else(USE_CUDA) ...@@ -114,6 +114,11 @@ else(USE_CUDA)
add_library(dgl SHARED ${DGL_SRC}) add_library(dgl SHARED ${DGL_SRC})
endif(USE_CUDA) endif(USE_CUDA)
# For serialization
add_subdirectory("third_party/dmlc-core")
list(APPEND DGL_LINKER_LIBS dmlc)
set(GOOGLE_TEST 0) # Turn off dmlc-core test
target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS}) target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS})
# Installation rules # Installation rules
......
...@@ -16,6 +16,9 @@ Utils ...@@ -16,6 +16,9 @@ Utils
utils.check_sha1 utils.check_sha1
utils.extract_archive utils.extract_archive
utils.split_dataset utils.split_dataset
utils.save_graphs
utils.load_graphs
utils.load_labels
.. autoclass:: dgl.data.utils.Subset .. autoclass:: dgl.data.utils.Subset
:members: __getitem__, __len__ :members: __getitem__, __len__
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* Copyright (c) 2019 by Contributors * Copyright (c) 2019 by Contributors
* \file packed_func_ext.h * \file packed_func_ext.h
* \brief Extension package to PackedFunc * \brief Extension package to PackedFunc
* This enales pass ObjectRef types into/from PackedFunc. * This enables pass ObjectRef types into/from PackedFunc.
*/ */
#ifndef DGL_PACKED_FUNC_EXT_H_ #ifndef DGL_PACKED_FUNC_EXT_H_
#define DGL_PACKED_FUNC_EXT_H_ #define DGL_PACKED_FUNC_EXT_H_
......
...@@ -165,7 +165,7 @@ class IterAdapter { ...@@ -165,7 +165,7 @@ class IterAdapter {
* values, try use the constructor to create the list at once (for example * values, try use the constructor to create the list at once (for example
* from an existing vector). * from an existing vector).
* *
* operator[] only provide const acces, use Set to mutate the content. * operator[] only provide const access, use Set to mutate the content.
* *
* \tparam T The content ObjectRef type. * \tparam T The content ObjectRef type.
*/ */
......
...@@ -68,7 +68,7 @@ class StrMap(Map): ...@@ -68,7 +68,7 @@ class StrMap(Map):
def items(self): def items(self):
"""Get the items from the map""" """Get the items from the map"""
akvs = _api_internal._MapItems(self) akvs = _api_internal._MapItems(self)
return [(akvs[i].value, akvs[i+1]) for i in range(0, len(akvs), 2)] return [(akvs[i].data, akvs[i+1]) for i in range(0, len(akvs), 2)]
@register_object @register_object
class Value(ObjectBase): class Value(ObjectBase):
......
"""For Graph Serialization"""
from __future__ import absolute_import
from ..graph import DGLGraph
from ..batched_graph import BatchedDGLGraph
from .._ffi.object import ObjectBase, register_object
from .._ffi.function import _init_api
from .. import backend as F
_init_api("dgl.data.graph_serialize")
__all__ = ['save_graphs', "load_graphs", "load_labels"]
@register_object("graph_serialize.StorageMetaData")
class StorageMetaData(ObjectBase):
"""StorageMetaData Object
attributes available:
num_graph [int]: return numbers of graphs
nodes_num_list Value of NDArray: return number of nodes for each graph
edges_num_list Value of NDArray: return number of edges for each graph
labels [dict of backend tensors]: return dict of labels
graph_data [list of GraphData]: return list of GraphData Object
"""
@register_object("graph_serialize.GraphData")
class GraphData(ObjectBase):
"""GraphData Object"""
@staticmethod
def create(g: DGLGraph):
"""Create GraphData"""
assert not isinstance(g, BatchedDGLGraph), "BatchedDGLGraph is not supported for serialization"
ghandle = g._graph
if len(g.ndata) != 0:
node_tensors = dict()
for key, value in g.ndata.items():
node_tensors[key] = F.zerocopy_to_dgl_ndarray(value)
else:
node_tensors = None
if len(g.edata) != 0:
edge_tensors = dict()
for key, value in g.edata.items():
edge_tensors[key] = F.zerocopy_to_dgl_ndarray(value)
else:
edge_tensors = None
return _CAPI_MakeGraphData(ghandle, node_tensors, edge_tensors)
def get_graph(self):
"""Get DGLGraph from GraphData"""
ghandle = _CAPI_GDataGraphHandle(self)
g = DGLGraph(graph_data=ghandle, readonly=True)
node_tensors_items = _CAPI_GDataNodeTensors(self).items()
edge_tensors_items = _CAPI_GDataEdgeTensors(self).items()
for k, v in node_tensors_items:
g.ndata[k] = F.zerocopy_from_dgl_ndarray(v.data)
for k, v in edge_tensors_items:
g.edata[k] = F.zerocopy_from_dgl_ndarray(v.data)
return g
def save_graphs(filename, g_list, labels=None):
r"""
Save DGLGraphs and graph labels to file
Parameters
----------
filename : str
File name to store DGLGraphs.
g_list: list
DGLGraph or list of DGLGraph
labels: dict (Default: None)
labels should be dict of tensors/ndarray, with str as keys
Examples
----------
>>> import dgl
>>> import torch as th
Create :code:`DGLGraph` objects and initialize node and edge features.
>>> g1 = dgl.DGLGraph()
>>> g1.add_nodes(3)
>>> g1.add_edges([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])
>>> g1.ndata["e"] = th.ones(3, 5)
>>> g2 = dgl.DGLGraph()
>>> g2.add_nodes(3)
>>> g2.add_edges([0, 1, 2], [1, 2, 1])
>>> g1.edata["e"] = th.ones(3, 4)
Save Graphs into file
>>> from dgl.data.utils import save_graphs
>>> graph_labels = {"glabel": th.tensor([0, 1])}
>>> save_graphs([g1, g2], "./data.bin", graph_labels)
"""
if isinstance(g_list, DGLGraph):
g_list = [g_list]
if (labels is not None) and (len(labels) != 0):
label_dict = dict()
for key, value in labels.items():
label_dict[key] = F.zerocopy_to_dgl_ndarray(value)
else:
label_dict = None
gdata_list = [GraphData.create(g) for g in g_list]
_CAPI_DGLSaveGraphs(filename, gdata_list, label_dict)
def load_graphs(filename, idx_list=None):
"""
Load DGLGraphs from file
Parameters
----------
filename: str
filename to load DGLGraphs
idx_list: list of int
list of index of graph to be loaded. If not specified, will
load all graphs from file
Returns
----------
graph_list: list of immutable DGLGraphs
labels: dict of labels stored in file (empty dict returned if no
label stored)
Examples
----------
Following the example in save_graphs.
>>> from dgl.utils.data import load_graphs
>>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2]
>>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1]
"""
assert isinstance(idx_list, list)
if idx_list is None:
idx_list = []
metadata = _CAPI_DGLLoadGraphs(filename, idx_list, False)
label_dict = {}
for k, v in metadata.labels.items():
label_dict[k] = F.zerocopy_from_dgl_ndarray(v.data)
return [gdata.get_graph() for gdata in metadata.graph_data], label_dict
def load_labels(filename):
"""
Load label dict from file
Parameters
----------
filename: str
filename to load DGLGraphs
Returns
----------
labels: dict
dict of labels stored in file (empty dict returned if no
label stored)
Examples
----------
Following the example in save_graphs.
>>> from dgl.data.utils import load_labels
>>> label_dict = load_graphs("./data.bin")
"""
metadata = _CAPI_DGLLoadGraphs(filename, [], True)
label_dict = {}
for k, v in metadata.labels.items():
label_dict[k] = F.zerocopy_from_dgl_ndarray(v.data)
return label_dict
...@@ -8,6 +8,9 @@ import warnings ...@@ -8,6 +8,9 @@ import warnings
import zipfile import zipfile
import tarfile import tarfile
import numpy as np import numpy as np
from .graph_serialize import save_graphs, load_graphs, load_labels
try: try:
import requests import requests
except ImportError: except ImportError:
...@@ -16,7 +19,8 @@ except ImportError: ...@@ -16,7 +19,8 @@ except ImportError:
requests = requests_failed_to_import requests = requests_failed_to_import
__all__ = ['download', 'check_sha1', 'extract_archive', __all__ = ['download', 'check_sha1', 'extract_archive',
'get_download_dir', 'Subset', 'split_dataset'] 'get_download_dir', 'Subset', 'split_dataset',
'save_graphs', "load_graphs", "load_labels"]
def _get_dgl_url(file_url): def _get_dgl_url(file_url):
......
/*!
* Copyright (c) 2019 by Contributors
* \file graph/graph_serialize.cc
* \brief Graph serialization implementation
*
* The storage structure is
* {
* // MetaData Section
* uint64_t kDGLSerializeMagic
* uint64_t kVersion
* uint64_t GraphType
* ** Reserved Area till 4kB **
*
* dgl_id_t num_graphs
* vector<dgl_id_t> graph_indices (start address of each graph)
* vector<dgl_id_t> nodes_num_list (list of number of nodes for each graph)
* vector<dgl_id_t> edges_num_list (list of number of edges for each graph)
*
* vector<GraphData> graph_datas;
*
* }
*
* Storage of GraphData is
* {
* // Everything uses in csr
* NDArray indptr
* NDArray indices
* NDArray edge_ids
* vector<pair<string, NDArray>> node_tensors;
* vector<pair<string, NDArray>> edge_tensors;
* }
*
*/
#include "graph_serialize.h"
#include <dmlc/io.h>
#include <dmlc/type_traits.h>
#include <dgl/runtime/container.h>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/object.h>
#include <dgl/graph_op.h>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <utility>
using namespace dgl::runtime;
using dgl::COO;
using dgl::COOPtr;
using dgl::ImmutableGraph;
using dmlc::SeekStream;
using dgl::runtime::NDArray;
using std::vector;
using dgl::serialize::GraphData;
using dgl::serialize::GraphDataObject;
namespace dmlc {
DMLC_DECLARE_TRAITS(has_saveload, NDArray, true);
DMLC_DECLARE_TRAITS(has_saveload, GraphDataObject, true);
}
namespace dgl {
namespace serialize {
enum GraphType {
kMutableGraph = 0ull,
kImmutableGraph = 1ull
};
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_MakeGraphData")
.set_body([](DGLArgs args, DGLRetValue *rv) {
GraphRef gptr = args[0];
ImmutableGraphPtr imGPtr = ToImmutableGraph(gptr.sptr());
Map<std::string, Value> node_tensors = args[1];
Map<std::string, Value> edge_tensors = args[2];
GraphData gd = GraphData::Create();
gd->SetData(imGPtr, node_tensors, edge_tensors);
*rv = gd;
});
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_DGLSaveGraphs")
.set_body([](DGLArgs args, DGLRetValue *rv) {
std::string filename = args[0];
List<GraphData> graph_data = args[1];
Map<std::string, Value> labels = args[2];
std::vector<NamedTensor> labels_list;
for (auto kv : labels) {
std::string name = kv.first;
Value v = kv.second;
NDArray ndarray = static_cast<NDArray>(v->data);
labels_list.emplace_back(name, ndarray);
}
SaveDGLGraphs(filename, graph_data, labels_list);
});
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_DGLLoadGraphs")
.set_body([](DGLArgs args, DGLRetValue *rv) {
std::string filename = args[0];
List<Value> idxs = args[1];
bool onlyMeta = args[2];
std::vector<size_t> idx_list(idxs.size());
for (uint64_t i = 0; i < idxs.size(); ++i) {
idx_list[i] = static_cast<dgl_id_t >(idxs[i]->data);
}
*rv = LoadDGLGraphs(filename, idx_list, onlyMeta);
});
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataGraphHandle")
.set_body([](DGLArgs args, DGLRetValue *rv) {
GraphData gdata = args[0];
*rv = gdata->gptr;
});
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataNodeTensors")
.set_body([](DGLArgs args, DGLRetValue *rv) {
GraphData gdata = args[0];
Map<std::string, Value> rvmap;
for (auto kv : gdata->node_tensors) {
rvmap.Set(kv.first, Value(MakeValue(kv.second)));
}
*rv = rvmap;
});
DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataEdgeTensors")
.set_body([](DGLArgs args, DGLRetValue *rv) {
GraphData gdata = args[0];
Map<std::string, Value> rvmap;
for (auto kv : gdata->edge_tensors) {
rvmap.Set(kv.first, Value(MakeValue(kv.second)));
}
*rv = rvmap;
});
constexpr uint64_t kDGLSerializeMagic = 0xDD2E4FF046B4A13F;
bool SaveDGLGraphs(std::string filename,
List<GraphData> graph_data,
std::vector<NamedTensor> labels_list) {
auto *fs = dynamic_cast<SeekStream *>(SeekStream::Create(filename.c_str(), "w",
true));
CHECK(fs) << "File name is not a valid local file name";
// Write DGL MetaData
const uint64_t kVersion = 1;
fs->Write(kDGLSerializeMagic);
fs->Write(kVersion);
fs->Write(kImmutableGraph);
fs->Seek(4096);
// Write Graph Meta Data
dgl_id_t num_graph = graph_data.size();
std::vector<dgl_id_t> graph_indices(num_graph);
std::vector<int64_t> nodes_num_list(num_graph);
std::vector<int64_t> edges_num_list(num_graph);
for (uint64_t i = 0; i < num_graph; ++i) {
nodes_num_list[i] = graph_data[i]->gptr->NumVertices();
edges_num_list[i] = graph_data[i]->gptr->NumEdges();
}
// Reserve spaces for graph indices
fs->Write(num_graph);
dgl_id_t indices_start_ptr = fs->Tell();
fs->Write(graph_indices);
fs->Write(nodes_num_list);
fs->Write(edges_num_list);
fs->Write(labels_list);
// Write GraphData
for (uint64_t i = 0; i < num_graph; ++i) {
graph_indices[i] = fs->Tell();
GraphDataObject gdata = *graph_data[i].as<GraphDataObject>();
fs->Write(gdata);
}
fs->Seek(indices_start_ptr);
fs->Write(graph_indices);
std::vector<dgl_id_t> test;
fs->Seek(indices_start_ptr);
fs->Read(&test);
delete fs;
return true;
}
StorageMetaData LoadDGLGraphs(const std::string &filename,
std::vector<dgl_id_t> idx_list,
bool onlyMeta) {
SeekStream *fs = SeekStream::CreateForRead(filename.c_str(), true);
StorageMetaData metadata = StorageMetaData::Create();
// Read DGL MetaData
uint64_t magicNum, graphType, version;
fs->Read(&magicNum);
fs->Read(&graphType);
fs->Read(&version);
fs->Seek(4096);
CHECK_EQ(magicNum, kDGLSerializeMagic) << "Invalid DGL files";
CHECK_EQ(graphType, kImmutableGraph) << "Invalid DGL files";
CHECK_EQ(version, 1) << "Invalid Serialization Version";
// Read Graph MetaData
dgl_id_t num_graph;
CHECK(fs->Read(&num_graph)) << "Invalid num of graph";
std::vector<dgl_id_t> graph_indices;
std::vector<int64_t> nodes_num_list;
std::vector<int64_t> edges_num_list;
std::vector<NamedTensor> labels_list;
CHECK(fs->Read(&graph_indices)) << "Invalid graph indices";
CHECK(fs->Read(&nodes_num_list)) << "Invalid node num list";
CHECK(fs->Read(&edges_num_list)) << "Invalid edge num list";
CHECK(fs->Read(&labels_list)) << "Invalid label list";
metadata->SetMetaData(num_graph, nodes_num_list, edges_num_list, labels_list);
std::vector<GraphData> gdata_refs;
// Early Return
if (onlyMeta) {
delete fs;
return metadata;
}
if (idx_list.empty()) {
// Read All Graphs
gdata_refs.reserve(num_graph);
for (uint64_t i = 0; i < num_graph; ++i) {
GraphData gdata = GraphData::Create();
GraphDataObject *gdata_ptr =
const_cast<GraphDataObject *>(gdata.as<GraphDataObject>());
fs->Read(gdata_ptr);
gdata_refs.push_back(gdata);
}
} else {
// Read Selected Graphss
gdata_refs.reserve(idx_list.size());
// Would be better if idx_list is sorted. However the returned the graphs should be the same
// order as the idx_list
for (uint64_t i = 0; i < idx_list.size(); ++i) {
fs->Seek(graph_indices[idx_list[i]]);
GraphData gdata = GraphData::Create();
GraphDataObject *gdata_ptr =
const_cast<GraphDataObject *>(gdata.as<GraphDataObject>());
fs->Read(gdata_ptr);
gdata_refs.push_back(gdata);
}
}
metadata->SetGraphData(gdata_refs);
delete fs;
return metadata;
}
void GraphDataObject::SetData(ImmutableGraphPtr gptr,
Map<std::string, Value> node_tensors,
Map<std::string, Value> edge_tensors) {
this->gptr = gptr;
for (auto kv : node_tensors) {
std::string name = kv.first;
Value v = kv.second;
NDArray ndarray = static_cast<NDArray>(v->data);
this->node_tensors.emplace_back(name, ndarray);
}
for (auto kv : edge_tensors) {
std::string &name = kv.first;
Value v = kv.second;
const NDArray &ndarray = static_cast<NDArray>(v->data);
this->edge_tensors.emplace_back(name, ndarray);
}
}
void GraphDataObject::Save(dmlc::Stream *fs) const {
// Using in csr for storage
const CSRPtr g_csr = this->gptr->GetInCSR();
fs->Write(g_csr->indptr());
fs->Write(g_csr->indices());
fs->Write(g_csr->edge_ids());
fs->Write(node_tensors);
fs->Write(edge_tensors);
}
bool GraphDataObject::Load(dmlc::Stream *fs) {
NDArray indptr, indices, edge_ids;
fs->Read(&indptr);
fs->Read(&indices);
fs->Read(&edge_ids);
this->gptr = ImmutableGraph::CreateFromCSR(indptr, indices, edge_ids, "in");
fs->Read(&this->node_tensors);
fs->Read(&this->edge_tensors);
return true;
}
ImmutableGraphPtr BatchLoadedGraphs(std::vector<GraphData> gdata_list) {
std::vector<GraphPtr> gptrs;
gptrs.reserve(gdata_list.size());
for (auto gdata : gdata_list) {
gptrs.push_back(static_cast<GraphPtr>(gdata->gptr));
}
ImmutableGraphPtr imGPtr = std::dynamic_pointer_cast<ImmutableGraph>(
GraphOp::DisjointUnion(gptrs));
return imGPtr;
}
ImmutableGraphPtr ToImmutableGraph(GraphPtr g) {
ImmutableGraphPtr imgr = std::dynamic_pointer_cast<ImmutableGraph>(g);
if (imgr) {
return imgr;
} else {
MutableGraphPtr mgr = std::dynamic_pointer_cast<Graph>(g);
CHECK(mgr) << "Invalid Graph Pointer";
EdgeArray earray = mgr->Edges("eid");
IdArray srcs_array = earray.src;
IdArray dsts_array = earray.dst;
ImmutableGraphPtr imgptr = ImmutableGraph::CreateFromCOO(mgr->NumVertices(), srcs_array,
dsts_array);
return imgptr;
}
}
void StorageMetaDataObject::SetMetaData(dgl_id_t num_graph,
std::vector<int64_t> nodes_num_list,
std::vector<int64_t> edges_num_list,
std::vector<NamedTensor> labels_list) {
this->num_graph = num_graph;
this->nodes_num_list = Value(MakeValue(aten::VecToIdArray(nodes_num_list)));
this->edges_num_list = Value(MakeValue(aten::VecToIdArray(edges_num_list)));
for (auto kv : labels_list) {
this->labels_list.Set(kv.first, Value(MakeValue(kv.second)));
}
}
void StorageMetaDataObject::SetGraphData(std::vector<GraphData> gdata) {
this->graph_data = List<GraphData>(gdata);
}
} // namespace serialize
} // namespace dgl
/*!
* Copyright (c) 2019 by Contributors
* \file graph/graph_serialize.h
* \brief Graph serialization header
*/
#ifndef DGL_GRAPH_GRAPH_SERIALIZE_H_
#define DGL_GRAPH_GRAPH_SERIALIZE_H_
#include <dgl/graph.h>
#include <dgl/array.h>
#include <dgl/immutable_graph.h>
#include <dmlc/io.h>
#include <dmlc/type_traits.h>
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/container.h>
#include <dgl/runtime/object.h>
#include <dgl/packed_func_ext.h>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <utility>
#include "../c_api_common.h"
using dgl::runtime::NDArray;
using dgl::ImmutableGraph;
using namespace dgl::runtime;
namespace dgl {
namespace serialize {
typedef std::pair<std::string, NDArray> NamedTensor;
class GraphDataObject : public runtime::Object {
public:
ImmutableGraphPtr gptr;
std::vector<NamedTensor> node_tensors;
std::vector<NamedTensor> edge_tensors;
static constexpr const char *_type_key = "graph_serialize.GraphData";
void SetData(ImmutableGraphPtr gptr,
Map<std::string, Value> node_tensors,
Map<std::string, Value> edge_tensors);
void Save(dmlc::Stream *fs) const;
bool Load(dmlc::Stream *fs);
DGL_DECLARE_OBJECT_TYPE_INFO(GraphDataObject, runtime::Object);
};
class GraphData : public runtime::ObjectRef {
public:
DGL_DEFINE_OBJECT_REF_METHODS(GraphData, runtime::ObjectRef, GraphDataObject);
/*! \brief create a new GraphData reference */
static GraphData Create() {
return GraphData(std::make_shared<GraphDataObject>());
}
};
class StorageMetaDataObject : public runtime::Object {
public:
dgl_id_t num_graph;
Value nodes_num_list;
Value edges_num_list;
Map<std::string, Value> labels_list;
List<GraphData> graph_data;
static constexpr const char *_type_key = "graph_serialize.StorageMetaData";
void SetMetaData(dgl_id_t num_graph,
std::vector<int64_t> nodes_num_list,
std::vector<int64_t> edges_num_list,
std::vector<NamedTensor> labels_list);
void SetGraphData(std::vector<GraphData> gdata);
void VisitAttrs(AttrVisitor *v) final {
v->Visit("num_graph", &num_graph);
v->Visit("nodes_num_list", &nodes_num_list);
v->Visit("edges_num_list", &edges_num_list);
v->Visit("labels", &labels_list);
v->Visit("graph_data", &graph_data);
}
DGL_DECLARE_OBJECT_TYPE_INFO(StorageMetaDataObject, runtime::Object);
};
class StorageMetaData : public runtime::ObjectRef {
public:
DGL_DEFINE_OBJECT_REF_METHODS(StorageMetaData, runtime::ObjectRef, StorageMetaDataObject);
/*! \brief create a new StorageMetaData reference */
static StorageMetaData Create() {
return StorageMetaData(std::make_shared<StorageMetaDataObject>());
}
};
bool SaveDGLGraphs(std::string filename,
List<GraphData> graph_data,
std::vector<NamedTensor> labels_list);
StorageMetaData LoadDGLGraphs(const std::string &filename,
std::vector<dgl_id_t> idx_list,
bool onlyMeta = false);
ImmutableGraphPtr ToImmutableGraph(GraphPtr g);
} // namespace serialize
} // namespace dgl
#endif // DGL_GRAPH_GRAPH_SERIALIZE_H_
import backend as F
import numpy as np
import scipy as sp
import time
import tempfile
import os
from dgl import DGLGraph
import dgl
from dgl.data.utils import save_graphs, load_graphs, load_labels
np.random.seed(44)
def generate_rand_graph(n):
arr = (sp.sparse.random(n, n, density=0.1,
format='coo') != 0).astype(np.int64)
return DGLGraph(arr, readonly=True)
def construct_graph(n, readonly=True):
g_list = []
for i in range(n):
g = generate_rand_graph(30)
g.edata['e1'] = F.randn((g.number_of_edges(), 32))
g.edata['e2'] = F.ones((g.number_of_edges(), 32))
g.ndata['n1'] = F.randn((g.number_of_nodes(), 64))
g.readonly(i % 2 == 0)
g_list.append(g)
return g_list
def test_graph_serialize_with_feature():
num_graphs = 100
t0 = time.time()
g_list = construct_graph(num_graphs)
t1 = time.time()
# create a temporary file and immediately release it so DGL can open it.
f = tempfile.NamedTemporaryFile(delete=False)
path = f.name
f.close()
save_graphs(path, g_list)
t2 = time.time()
idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
loadg_list, _ = load_graphs(path, idx_list)
t3 = time.time()
idx = idx_list[0]
load_g = loadg_list[0]
print("Save time: {} s".format(t2 - t1))
print("Load time: {} s".format(t3 - t2))
print("Graph Construction time: {} s".format(t1 - t0))
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1'])
assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2'])
assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1'])
t4 = time.time()
bg = dgl.batch(loadg_list)
t5 = time.time()
print("Batch time: {} s".format(t5 - t4))
os.unlink(path)
def test_graph_serialize_without_feature():
num_graphs = 100
g_list = [generate_rand_graph(30) for _ in range(num_graphs)]
# create a temporary file and immediately release it so DGL can open it.
f = tempfile.NamedTemporaryFile(delete=False)
path = f.name
f.close()
save_graphs(path, g_list)
idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
loadg_list, _ = load_graphs(path, idx_list)
idx = idx_list[0]
load_g = loadg_list[0]
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
os.unlink(path)
def test_graph_serialize_with_labels():
num_graphs = 100
g_list = [generate_rand_graph(30) for _ in range(num_graphs)]
labels = {"label": F.zeros((num_graphs, 1))}
# create a temporary file and immediately release it so DGL can open it.
f = tempfile.NamedTemporaryFile(delete=False)
path = f.name
f.close()
save_graphs(path, g_list, labels)
idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
loadg_list, l_labels0 = load_graphs(path, idx_list)
l_labels = load_labels(path)
assert F.allclose(l_labels['label'], labels['label'])
assert F.allclose(l_labels0['label'], labels['label'])
idx = idx_list[0]
load_g = loadg_list[0]
assert F.allclose(load_g.nodes(), g_list[idx].nodes())
load_edges = load_g.all_edges('uv', 'eid')
g_edges = g_list[idx].all_edges('uv', 'eid')
assert F.allclose(load_edges[0], g_edges[0])
assert F.allclose(load_edges[1], g_edges[1])
os.unlink(path)
if __name__ == "__main__":
test_graph_serialize_with_feature()
test_graph_serialize_without_feature()
test_graph_serialize_with_labels()
...@@ -8,7 +8,7 @@ MD build ...@@ -8,7 +8,7 @@ MD build
PUSHD build PUSHD build
CALL "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat" CALL "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DUSE_OPENMP=ON -Dgtest_force_shared_crt=ON -DBUILD_CPP_TEST=1 -DCMAKE_CONFIGURATION_TYPES="Release" .. -G "Visual Studio 15 2017 Win64" || EXIT /B 1 cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DUSE_OPENMP=ON -Dgtest_force_shared_crt=ON -DDMLC_FORCE_SHARED_CRT=ON -DBUILD_CPP_TEST=1 -DCMAKE_CONFIGURATION_TYPES="Release" .. -G "Visual Studio 15 2017 Win64" || EXIT /B 1
msbuild dgl.sln || EXIT /B 1 msbuild dgl.sln || EXIT /B 1
COPY Release\dgl.dll . COPY Release\dgl.dll .
COPY Release\runUnitTests.exe . COPY Release\runUnitTests.exe .
......
Subproject commit ee773cd6ab2a32c07cf3f09ebaf9205ddf0a616e Subproject commit 7ce90a342b0bda9b7f88e707a326496324d60efd
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment