"router/vscode:/vscode.git/clone" did not exist on "55106ec4766c787823361db80ea461715aa57a7a"
Unverified Commit 605b5185 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Refactor] Immutable graph index (#543)

* WIP

* header

* WIP .cc

* WIP

* transpose

* wip

* immutable graph .h and .cc

* WIP: nodeflow.cc

* compile

* remove all tmp dl managed ctx; they caused refcount issue

* one simple test

* WIP: testing

* test_graph

* fix graph index

* fix bug in sampler; pass pytorch utest

* WIP on mxnet

* fix lint

* fix mxnet unittest w/ unfortunate workaround

* fix msvc

* fix lint

* SliceRows and test_nodeflow

* resolve reviews

* resolve reviews

* try fix win ci

* try fix win ci

* poke win ci again

* poke

* lazy multigraph flag; stackoverflow error

* revert node subgraph test

* lazy object

* try fix win build

* try fix win build

* poke ci

* fix build script

* fix compile

* add a todo

* fix reviews

* fix compile
parent b2b8be25
......@@ -219,7 +219,7 @@ RandomWalkTraces BipartiteSingleSidedRandomWalkWithRestart(
DGL_REGISTER_GLOBAL("randomwalk._CAPI_DGLRandomWalk")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const IdArray seeds = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray seeds = args[1];
const int num_traces = args[2];
const int num_hops = args[3];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
......@@ -230,7 +230,7 @@ DGL_REGISTER_GLOBAL("randomwalk._CAPI_DGLRandomWalk")
DGL_REGISTER_GLOBAL("randomwalk._CAPI_DGLRandomWalkWithRestart")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const IdArray seeds = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray seeds = args[1];
const double restart_prob = args[2];
const uint64_t visit_threshold_per_seed = args[3];
const uint64_t max_visit_counts = args[4];
......@@ -245,7 +245,7 @@ DGL_REGISTER_GLOBAL("randomwalk._CAPI_DGLRandomWalkWithRestart")
DGL_REGISTER_GLOBAL("randomwalk._CAPI_DGLBipartiteSingleSidedRandomWalkWithRestart")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const IdArray seeds = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray seeds = args[1];
const double restart_prob = args[2];
const uint64_t visit_threshold_per_seed = args[3];
const uint64_t max_visit_counts = args[4];
......
......@@ -248,14 +248,10 @@ NodeFlow ConstructNodeFlow(std::vector<dgl_id_t> neighbor_list,
int64_t num_edges, int num_hops, bool is_multigraph) {
NodeFlow nf;
uint64_t num_vertices = sub_vers->size();
nf.node_mapping = IdArray::Empty({static_cast<int64_t>(num_vertices)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.edge_mapping = IdArray::Empty({static_cast<int64_t>(num_edges)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.layer_offsets = IdArray::Empty({static_cast<int64_t>(num_hops + 1)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.flow_offsets = IdArray::Empty({static_cast<int64_t>(num_hops)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.node_mapping = NewIdArray(num_vertices);
nf.edge_mapping = NewIdArray(num_edges);
nf.layer_offsets = NewIdArray(num_hops + 1);
nf.flow_offsets = NewIdArray(num_hops);
dgl_id_t *node_map_data = static_cast<dgl_id_t *>(nf.node_mapping->data);
dgl_id_t *layer_off_data = static_cast<dgl_id_t *>(nf.layer_offsets->data);
......@@ -263,11 +259,11 @@ NodeFlow ConstructNodeFlow(std::vector<dgl_id_t> neighbor_list,
dgl_id_t *edge_map_data = static_cast<dgl_id_t *>(nf.edge_mapping->data);
// Construct sub_csr_graph
auto subg_csr = std::make_shared<ImmutableGraph::CSR>(num_vertices, num_edges);
subg_csr->indices.resize(num_edges);
subg_csr->edge_ids.resize(num_edges);
dgl_id_t* col_list_out = subg_csr->indices.data();
int64_t* indptr_out = subg_csr->indptr.data();
// TODO(minjie): is nodeflow a multigraph?
auto subg_csr = CSRPtr(new CSR(num_vertices, num_edges, is_multigraph));
dgl_id_t* indptr_out = static_cast<dgl_id_t*>(subg_csr->indptr()->data);
dgl_id_t* col_list_out = static_cast<dgl_id_t*>(subg_csr->indices()->data);
dgl_id_t* eid_out = static_cast<dgl_id_t*>(subg_csr->edge_ids()->data);
size_t collected_nedges = 0;
// The data from the previous steps:
......@@ -303,10 +299,8 @@ NodeFlow ConstructNodeFlow(std::vector<dgl_id_t> neighbor_list,
// When we expose the sampled graph to a Python user, we say the input nodes
// are in the first layer and the seed nodes are in the last layer.
// Thus, when we copy sampled results to a CSR, we need to reverse the order of layers.
size_t row_idx = 0;
for (size_t i = layer_offsets[num_hops - 1]; i < layer_offsets[num_hops]; i++) {
indptr_out[row_idx++] = 0;
}
std::fill(indptr_out, indptr_out + num_vertices + 1, 0);
size_t row_idx = layer_offsets[num_hops] - layer_offsets[num_hops - 1];
layer_off_data[0] = 0;
layer_off_data[1] = layer_offsets[num_hops] - layer_offsets[num_hops - 1];
int out_layer_idx = 1;
......@@ -322,51 +316,49 @@ NodeFlow ConstructNodeFlow(std::vector<dgl_id_t> neighbor_list,
CHECK_EQ(dst_id, neigh_pos->at(i).id);
size_t pos = neigh_pos->at(i).pos;
CHECK_LE(pos, neighbor_list.size());
size_t num_edges = neigh_pos->at(i).num_edges;
if (neighbor_list.empty()) CHECK_EQ(num_edges, 0);
const size_t nedges = neigh_pos->at(i).num_edges;
if (neighbor_list.empty()) CHECK_EQ(nedges, 0);
// We need to map the Ids of the neighbors to the subgraph.
auto neigh_it = neighbor_list.begin() + pos;
for (size_t i = 0; i < num_edges; i++) {
for (size_t i = 0; i < nedges; i++) {
dgl_id_t neigh = *(neigh_it + i);
CHECK(layer_ver_maps[layer_id + 1].find(neigh) != layer_ver_maps[layer_id + 1].end());
col_list_out[collected_nedges + i] = layer_ver_maps[layer_id + 1][neigh];
}
// We can simply copy the edge Ids.
std::copy_n(edge_list.begin() + pos,
num_edges, edge_map_data + collected_nedges);
collected_nedges += num_edges;
indptr_out[row_idx+1] = indptr_out[row_idx] + num_edges;
nedges, edge_map_data + collected_nedges);
collected_nedges += nedges;
indptr_out[row_idx+1] = indptr_out[row_idx] + nedges;
row_idx++;
}
layer_off_data[out_layer_idx + 1] = layer_off_data[out_layer_idx]
+ layer_offsets[layer_id + 1] - layer_offsets[layer_id];
out_layer_idx++;
}
CHECK(row_idx == num_vertices);
CHECK(indptr_out[row_idx] == num_edges);
CHECK(out_layer_idx == num_hops);
CHECK(layer_off_data[out_layer_idx] == num_vertices);
CHECK_EQ(row_idx, num_vertices);
CHECK_EQ(indptr_out[row_idx], num_edges);
CHECK_EQ(out_layer_idx, num_hops);
CHECK_EQ(layer_off_data[out_layer_idx], num_vertices);
// Copy flow offsets.
flow_off_data[0] = 0;
int out_flow_idx = 0;
for (size_t i = 0; i < layer_offsets.size() - 2; i++) {
size_t num_edges = subg_csr->GetDegree(layer_off_data[i + 1], layer_off_data[i + 2]);
size_t num_edges = indptr_out[layer_off_data[i + 2]] - indptr_out[layer_off_data[i + 1]];
flow_off_data[out_flow_idx + 1] = flow_off_data[out_flow_idx] + num_edges;
out_flow_idx++;
}
CHECK(out_flow_idx == num_hops - 1);
CHECK(flow_off_data[num_hops - 1] == static_cast<uint64_t>(num_edges));
for (size_t i = 0; i < subg_csr->edge_ids.size(); i++) {
subg_csr->edge_ids[i] = i;
}
std::iota(eid_out, eid_out + num_edges, 0);
if (edge_type == "in") {
nf.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, is_multigraph));
if (edge_type == std::string("in")) {
nf.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr));
} else {
nf.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr, is_multigraph));
nf.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr));
}
return nf;
......@@ -382,9 +374,9 @@ NodeFlow SampleSubgraph(const ImmutableGraph *graph,
unsigned int time_seed = randseed();
const size_t num_seeds = seeds.size();
auto orig_csr = edge_type == "in" ? graph->GetInCSR() : graph->GetOutCSR();
const dgl_id_t* val_list = orig_csr->edge_ids.data();
const dgl_id_t* col_list = orig_csr->indices.data();
const int64_t* indptr = orig_csr->indptr.data();
const dgl_id_t* val_list = static_cast<dgl_id_t*>(orig_csr->edge_ids()->data);
const dgl_id_t* col_list = static_cast<dgl_id_t*>(orig_csr->indices()->data);
const dgl_id_t* indptr = static_cast<dgl_id_t*>(orig_csr->indptr()->data);
std::unordered_set<dgl_id_t> sub_ver_map; // The vertex Ids in a layer.
std::vector<std::pair<dgl_id_t, int> > sub_vers;
......@@ -535,7 +527,7 @@ NodeFlow SamplerOp::NeighborUniformSample(const ImmutableGraph *graph,
}
namespace {
void ConstructLayers(const int64_t *indptr,
void ConstructLayers(const dgl_id_t *indptr,
const dgl_id_t *indices,
const std::vector<dgl_id_t>& seed_array,
IdArray layer_sizes,
......@@ -596,14 +588,14 @@ namespace {
}
}
void ConstructFlows(const int64_t *indptr,
void ConstructFlows(const dgl_id_t *indptr,
const dgl_id_t *indices,
const dgl_id_t *eids,
const std::vector<dgl_id_t> &node_mapping,
const std::vector<int64_t> &actl_layer_sizes,
ImmutableGraph::CSR::vector<int64_t> *sub_indptr,
ImmutableGraph::CSR::vector<dgl_id_t> *sub_indices,
ImmutableGraph::CSR::vector<dgl_id_t> *sub_eids,
std::vector<dgl_id_t> *sub_indptr,
std::vector<dgl_id_t> *sub_indices,
std::vector<dgl_id_t> *sub_eids,
std::vector<dgl_id_t> *flow_offsets,
std::vector<dgl_id_t> *edge_mapping) {
/*
......@@ -626,7 +618,7 @@ namespace {
auto dst = node_mapping[first + src_size + j];
typedef std::pair<dgl_id_t, dgl_id_t> id_pair;
std::vector<id_pair> neighbor_indices;
for (int64_t k = indptr[dst]; k < indptr[dst + 1]; ++k) {
for (dgl_id_t k = indptr[dst]; k < indptr[dst + 1]; ++k) {
// TODO(gaiyu): accelerate hash table lookup
auto ret = source_map.find(indices[k]);
if (ret != source_map.end()) {
......@@ -654,9 +646,9 @@ NodeFlow SamplerOp::LayerUniformSample(const ImmutableGraph *graph,
const std::string &neighbor_type,
IdArray layer_sizes) {
const auto g_csr = neighbor_type == "in" ? graph->GetInCSR() : graph->GetOutCSR();
const int64_t *indptr = g_csr->indptr.data();
const dgl_id_t *indices = g_csr->indices.data();
const dgl_id_t *eids = g_csr->edge_ids.data();
const dgl_id_t *indptr = static_cast<dgl_id_t*>(g_csr->indptr()->data);
const dgl_id_t *indices = static_cast<dgl_id_t*>(g_csr->indices()->data);
const dgl_id_t *eids = static_cast<dgl_id_t*>(g_csr->edge_ids()->data);
std::vector<dgl_id_t> layer_offsets;
std::vector<dgl_id_t> node_mapping;
......@@ -671,13 +663,7 @@ NodeFlow SamplerOp::LayerUniformSample(const ImmutableGraph *graph,
&actl_layer_sizes,
&probabilities);
NodeFlow nf;
int64_t n_nodes = node_mapping.size();
// TODO(gaiyu): a better estimate for the expected number of nodes
auto sub_csr = std::make_shared<ImmutableGraph::CSR>(n_nodes, n_nodes);
sub_csr->indptr.clear(); // TODO(zhengda): Why indptr.resize(num_vertices + 1)?
std::vector<dgl_id_t> sub_indptr, sub_indices, sub_edge_ids;
std::vector<dgl_id_t> flow_offsets;
std::vector<dgl_id_t> edge_mapping;
ConstructFlows(indptr,
......@@ -685,35 +671,31 @@ NodeFlow SamplerOp::LayerUniformSample(const ImmutableGraph *graph,
eids,
node_mapping,
actl_layer_sizes,
&(sub_csr->indptr),
&(sub_csr->indices),
&(sub_csr->edge_ids),
&sub_indptr,
&sub_indices,
&sub_edge_ids,
&flow_offsets,
&edge_mapping);
// sanity check
CHECK_GT(sub_indptr.size(), 0);
CHECK_EQ(sub_indptr[0], 0);
CHECK_EQ(sub_indptr.back(), sub_indices.size());
CHECK_EQ(sub_indices.size(), sub_edge_ids.size());
if (neighbor_type == "in") {
nf.graph = GraphPtr(new ImmutableGraph(sub_csr, nullptr, graph->IsMultigraph()));
NodeFlow nf;
auto sub_csr = CSRPtr(new CSR(
VecToIdArray(sub_indptr), VecToIdArray(sub_indices), VecToIdArray(sub_edge_ids)));
if (neighbor_type == std::string("in")) {
nf.graph = GraphPtr(new ImmutableGraph(sub_csr, nullptr));
} else {
nf.graph = GraphPtr(new ImmutableGraph(nullptr, sub_csr, graph->IsMultigraph()));
}
nf.node_mapping = IdArray::Empty({n_nodes},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.edge_mapping = IdArray::Empty({static_cast<int64_t>(edge_mapping.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.layer_offsets = IdArray::Empty({static_cast<int64_t>(layer_offsets.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
nf.flow_offsets = IdArray::Empty({static_cast<int64_t>(flow_offsets.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
std::copy(node_mapping.begin(), node_mapping.end(),
static_cast<dgl_id_t*>(nf.node_mapping->data));
std::copy(edge_mapping.begin(), edge_mapping.end(),
static_cast<dgl_id_t*>(nf.edge_mapping->data));
std::copy(layer_offsets.begin(), layer_offsets.end(),
static_cast<dgl_id_t*>(nf.layer_offsets->data));
std::copy(flow_offsets.begin(), flow_offsets.end(),
static_cast<dgl_id_t*>(nf.flow_offsets->data));
nf.graph = GraphPtr(new ImmutableGraph(nullptr, sub_csr));
}
nf.node_mapping = VecToIdArray(node_mapping);
nf.edge_mapping = VecToIdArray(edge_mapping);
nf.layer_offsets = VecToIdArray(layer_offsets);
nf.flow_offsets = VecToIdArray(flow_offsets);
return nf;
}
......@@ -722,7 +704,7 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_UniformSampling")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
// arguments
const GraphHandle ghdl = args[0];
const IdArray seed_nodes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray seed_nodes = args[1];
const int64_t batch_start_id = args[2];
const int64_t batch_size = args[3];
const int64_t max_num_workers = args[4];
......@@ -761,11 +743,11 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_LayerSampling")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
// arguments
const GraphHandle ghdl = args[0];
const IdArray seed_nodes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray seed_nodes = args[1];
const int64_t batch_start_id = args[2];
const int64_t batch_size = args[3];
const int64_t max_num_workers = args[4];
const IdArray layer_sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[5]));
const IdArray layer_sizes = args[5];
const std::string neigh_type = args[6];
// process args
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghdl);
......@@ -794,6 +776,4 @@ DGL_REGISTER_GLOBAL("sampling._CAPI_LayerSampling")
*rv = WrapVectorReturn(nflows);
});
} // namespace dgl
......@@ -133,7 +133,7 @@ DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSNodes")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray src = args[1];
bool reversed = args[2];
const auto& front = BFSNodesFrontiers(*gptr, src, reversed);
IdArray node_ids = CopyVectorToNDArray(front.ids);
......@@ -164,7 +164,7 @@ DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray src = args[1];
bool reversed = args[2];
const auto& front = BFSEdgesFrontiers(*gptr, src, reversed);
IdArray edge_ids = CopyVectorToNDArray(front.ids);
......@@ -202,7 +202,7 @@ DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray source = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray source = args[1];
const bool reversed = args[2];
CHECK(IsValidIdArray(source)) << "Invalid source node id array.";
const int64_t len = source->shape[0];
......@@ -221,7 +221,7 @@ DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSLabeledEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle);
const IdArray source = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray source = args[1];
const bool reversed = args[2];
const bool has_reverse_edge = args[3];
const bool has_nontree_edge = args[4];
......
......@@ -121,11 +121,23 @@ size_t NDArray::GetSize() const {
return GetDataSize(data_->dl_tensor);
}
bool NDArray::IsContiguous() const {
CHECK(data_ != nullptr);
if (data_->dl_tensor.strides == nullptr)
return true;
for (int i = 0; i < data_->dl_tensor.ndim - 1; ++i) {
if (data_->dl_tensor.strides[i] !=
data_->dl_tensor.shape[i+1] * data_->dl_tensor.strides[i+1])
return false;
}
return data_->dl_tensor.strides[data_->dl_tensor.ndim - 1] == 1;
}
NDArray NDArray::CreateView(std::vector<int64_t> shape,
DLDataType dtype) {
DLDataType dtype,
int64_t offset) {
CHECK(data_ != nullptr);
CHECK(data_->dl_tensor.strides == nullptr)
<< "Can only create view for compact tensor";
CHECK(IsContiguous()) << "Can only create view for compact tensor";
NDArray ret = Internal::Create(shape, dtype, data_->dl_tensor.ctx);
ret.data_->dl_tensor.byte_offset =
this->data_->dl_tensor.byte_offset;
......@@ -136,7 +148,8 @@ NDArray NDArray::CreateView(std::vector<int64_t> shape,
// increase ref count
this->data_->IncRef();
ret.data_->manager_ctx = this->data_;
ret.data_->dl_tensor.data = this->data_->dl_tensor.data;
ret.data_->dl_tensor.data =
static_cast<char*>(this->data_->dl_tensor.data) + offset;
return ret;
}
......
......@@ -15,17 +15,17 @@ namespace dgl {
DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketing")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray msg_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray nids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray msg_ids = args[0];
const IdArray vids = args[1];
const IdArray nids = args[2];
*rv = ConvertNDArrayVectorToPackedFunc(sched::DegreeBucketing(msg_ids, vids, nids));
});
DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLGroupEdgeByNodeDegree")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray uids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray uids = args[0];
const IdArray vids = args[1];
const IdArray eids = args[2];
*rv = ConvertNDArrayVectorToPackedFunc(
sched::GroupEdgeByNodeDegree(uids, vids, eids));
});
......
......@@ -207,9 +207,9 @@ def test_batch_no_edge():
if __name__ == '__main__':
test_batch_unbatch()
test_batch_unbatch1()
test_batch_unbatch2()
test_batched_edge_ordering()
test_batch_send_then_recv()
test_batch_send_and_recv()
test_batch_propagate()
test_batch_no_edge()
#test_batch_unbatch2()
#test_batched_edge_ordering()
#test_batch_send_then_recv()
#test_batch_send_and_recv()
#test_batch_propagate()
#test_batch_no_edge()
......@@ -7,7 +7,214 @@ import dgl
import backend as F
from dgl import DGLError
def test_graph_creation():
# graph generation: a random graph with 10 nodes
# and 20 edges.
# - has self loop
# - no multi edge
def edge_pair_input(sort=False):
if sort:
src = [0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 9]
dst = [4, 6, 9, 3, 5, 3, 7, 5, 8, 1, 3, 4, 9, 1, 9, 6, 2, 8, 9, 2]
return src, dst
else:
src = [0, 0, 4, 5, 0, 4, 7, 4, 4, 3, 2, 7, 7, 5, 3, 2, 1, 9, 6, 1]
dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5]
return src, dst
def nx_input():
g = nx.DiGraph()
src, dst = edge_pair_input()
for i, e in enumerate(zip(src, dst)):
g.add_edge(*e, id=i)
return g
def elist_input():
src, dst = edge_pair_input()
return list(zip(src, dst))
def scipy_coo_input():
src, dst = edge_pair_input()
return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10))
def scipy_csr_input():
src, dst = edge_pair_input()
csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10)).tocsr()
csr.sort_indices()
# src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
# dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
return csr
def gen_by_mutation():
g = dgl.DGLGraph()
src, dst = edge_pair_input()
g.add_nodes(10)
g.add_edges(src, dst)
return g
def gen_from_data(data, readonly):
g = dgl.DGLGraph(data, readonly=readonly)
return g
def test_query():
def _test_one(g):
assert g.number_of_nodes() == 10
assert g.number_of_edges() == 20
assert len(g) == 10
assert not g.is_multigraph
for i in range(10):
assert g.has_node(i)
assert i in g
assert not g.has_node(11)
assert not g.has_node(-1)
assert not -1 in g
assert F.allclose(g.has_nodes([-1,0,2,10,11]), F.tensor([0,1,1,0,0]))
src, dst = edge_pair_input()
for u, v in zip(src, dst):
assert g.has_edge_between(u, v)
assert not g.has_edge_between(0, 0)
assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
assert set(F.asnumpy(g.successors(2))) == set([7,3])
assert g.edge_id(4,4) == 5
assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([5,0]))
src, dst = g.find_edges([3, 6, 5])
assert F.allclose(src, F.tensor([5, 7, 4]))
assert F.allclose(dst, F.tensor([9, 9, 4]))
src, dst, eid = g.in_edges(9, form='all')
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7)])
src, dst, eid = g.in_edges([9,0,8], form='all') # test node#0 has no in edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7),(3,8,9),(7,8,12)])
src, dst, eid = g.out_edges(0, form='all')
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4)])
src, dst, eid = g.out_edges([0,4,8], form='all') # test node#8 has no out edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4),(4,3,2),(4,4,5),(4,9,7),(4,1,8)])
src, dst, eid = g.edges('all', 'eid')
t_src, t_dst = edge_pair_input()
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(eid)) == list(range(20))
src, dst, eid = g.edges('all', 'srcdst')
t_src, t_dst = edge_pair_input()
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))
assert g.in_degree(0) == 0
assert g.in_degree(9) == 4
assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
assert g.out_degree(8) == 0
assert g.out_degree(9) == 1
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(F.sparse_to_numpy(g.adjacency_matrix()), scipy_coo_input().toarray().T)
assert np.array_equal(F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
def _test(g):
# test twice to see whether the cached format works or not
_test_one(g)
_test_one(g)
def _test_csr_one(g):
assert g.number_of_nodes() == 10
assert g.number_of_edges() == 20
assert len(g) == 10
assert not g.is_multigraph
for i in range(10):
assert g.has_node(i)
assert i in g
assert not g.has_node(11)
assert not g.has_node(-1)
assert not -1 in g
assert F.allclose(g.has_nodes([-1,0,2,10,11]), F.tensor([0,1,1,0,0]))
src, dst = edge_pair_input(sort=True)
for u, v in zip(src, dst):
assert g.has_edge_between(u, v)
assert not g.has_edge_between(0, 0)
assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
assert set(F.asnumpy(g.successors(2))) == set([7,3])
# src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
# dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
# eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
assert g.edge_id(4,4) == 11
assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([11,2]))
src, dst = g.find_edges([3, 6, 5])
assert F.allclose(src, F.tensor([1, 2, 2]))
assert F.allclose(dst, F.tensor([3, 7, 3]))
src, dst, eid = g.in_edges(9, form='all')
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12)])
src, dst, eid = g.in_edges([9,0,8], form='all') # test node#0 has no in edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12),(3,8,8),(7,8,17)])
src, dst, eid = g.out_edges(0, form='all')
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0)])
src, dst, eid = g.out_edges([0,4,8], form='all') # test node#8 has no out edges
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0),(4,3,10),(4,4,11),(4,9,12),(4,1,9)])
src, dst, eid = g.edges('all', 'eid')
t_src, t_dst = edge_pair_input(sort=True)
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(eid)) == list(range(20))
src, dst, eid = g.edges('all', 'srcdst')
t_src, t_dst = edge_pair_input(sort=True)
t_tup = list(zip(t_src, t_dst, list(range(20))))
tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
assert set(tup) == set(t_tup)
assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src)))
assert g.in_degree(0) == 0
assert g.in_degree(9) == 4
assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4]))
assert g.out_degree(8) == 0
assert g.out_degree(9) == 1
assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))
assert np.array_equal(F.sparse_to_numpy(g.adjacency_matrix()), scipy_coo_input().toarray().T)
assert np.array_equal(F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
def _test_csr(g):
# test twice to see whether the cached format works or not
_test_csr_one(g)
_test_csr_one(g)
_test(gen_by_mutation())
_test(gen_from_data(elist_input(), False))
_test(gen_from_data(elist_input(), True))
_test(gen_from_data(nx_input(), False))
_test(gen_from_data(nx_input(), True))
_test(gen_from_data(scipy_coo_input(), False))
_test(gen_from_data(scipy_coo_input(), True))
_test_csr(gen_from_data(scipy_csr_input(), False))
_test_csr(gen_from_data(scipy_csr_input(), True))
def test_mutation():
g = dgl.DGLGraph()
# test add nodes with data
g.add_nodes(5)
......@@ -31,17 +238,6 @@ def test_graph_creation():
g.init_edata('h2', (g.number_of_edges(), 3), 'float32')
assert F.allclose(F.zeros((g.number_of_edges(), 3)), g.edata['h2'])
def test_create_from_elist():
elist = [(2, 1), (1, 0), (2, 0), (3, 0), (0, 2)]
g = dgl.DGLGraph(elist)
for i, (u, v) in enumerate(elist):
assert g.edge_id(u, v) == i
# immutable graph
# XXX: not enabled for pytorch
#g = dgl.DGLGraph(elist, readonly=True)
#for i, (u, v) in enumerate(elist):
# assert g.edge_id(u, v) == i
def test_scipy_adjmat():
g = dgl.DGLGraph()
g.add_nodes(10)
......@@ -66,34 +262,6 @@ def test_scipy_adjmat():
assert np.array_equal(adj_t2.toarray(), adj_t3.toarray())
assert np.array_equal(adj_t0.toarray(), adj_t2.toarray())
def test_adjmat_cache():
n = 1000
p = 10 * math.log(n) / n
a = sp.random(n, n, p, data_rvs=lambda n: np.ones(n))
g = dgl.DGLGraph(a)
# the first call should contruct the adj
t0 = time.time()
adj1 = g.adjacency_matrix()
dur1 = time.time() - t0
# the second call should be cached and should be very fast
t0 = time.time()
adj2 = g.adjacency_matrix()
dur2 = time.time() - t0
print('first time {}, second time {}'.format(dur1, dur2))
assert dur2 < dur1
assert id(adj1) == id(adj2)
# different arg should result in different cache
adj3 = g.adjacency_matrix(transpose=True)
assert id(adj3) != id(adj2)
# manually clear the cache
g.clear_cache()
adj35 = g.adjacency_matrix()
assert id(adj35) != id(adj2)
# mutating the graph should invalidate the cache
g.add_nodes(10)
adj4 = g.adjacency_matrix()
assert id(adj4) != id(adj35)
def test_incmat():
g = dgl.DGLGraph()
g.add_nodes(4)
......@@ -127,34 +295,6 @@ def test_incmat():
[0., 1., 0., -1., 0.],
[0., 0., 1., 1., 0.]]))
def test_incmat_cache():
n = 1000
p = 10 * math.log(n) / n
a = sp.random(n, n, p, data_rvs=lambda n: np.ones(n))
g = dgl.DGLGraph(a)
# the first call should contruct the inc
t0 = time.time()
inc1 = g.incidence_matrix("in")
dur1 = time.time() - t0
# the second call should be cached and should be very fast
t0 = time.time()
inc2 = g.incidence_matrix("in")
dur2 = time.time() - t0
print('first time {}, second time {}'.format(dur1, dur2))
assert dur2 < dur1
assert id(inc1) == id(inc2)
# different arg should result in different cache
inc3 = g.incidence_matrix("both")
assert id(inc3) != id(inc2)
# manually clear the cache
g.clear_cache()
inc35 = g.incidence_matrix("in")
assert id(inc35) != id(inc2)
# mutating the graph should invalidate the cache
g.add_nodes(10)
inc4 = g.incidence_matrix("in")
assert id(inc4) != id(inc35)
def test_readonly():
g = dgl.DGLGraph()
g.add_nodes(5)
......@@ -242,11 +382,9 @@ def test_find_edges():
assert fail
if __name__ == '__main__':
test_graph_creation()
test_create_from_elist()
test_adjmat_cache()
test_query()
test_mutation()
test_scipy_adjmat()
test_incmat()
test_incmat_cache()
test_readonly()
test_find_edges()
#!/bin/bash
set -e
if [ -d build ]; then
rm -rf build
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment