Unverified Commit 8651be54 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Perf] Accelerate block_compute when all nodes are invoked. (#434)

* refactor.

* accelerate update_all in nodeflow.

* fix.

* refactor.

* fix lint.

* fix lint.

* reorganize.

* reorg.

* remove.

* add doc.

* impl block_incidence_matrix

* fix lint.

* fix.

* simple fix.

* fix test.

* fix interface.

* fix eid.

* fix comments.
parent ca2a7e1c
......@@ -534,23 +534,27 @@ class ImmutableGraph: public GraphInterface {
return edge_list_;
}
protected:
DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
/*!
* \brief Get the CSR array that represents the in-edges.
* This method copies data from std::vector to IdArray.
* \param start the first row to copy.
* \param end the last row to copy (exclusive).
* \return the CSR array.
*/
CSRArray GetInCSRArray() const;
CSRArray GetInCSRArray(size_t start, size_t end) const;
/*!
* \brief Get the CSR array that represents the out-edges.
* This method copies data from std::vector to IdArray.
* \param start the first row to copy.
* \param end the last row to copy (exclusive).
* \return the CSR array.
*/
CSRArray GetOutCSRArray() const;
CSRArray GetOutCSRArray(size_t start, size_t end) const;
protected:
DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
/*!
* \brief Compact a subgraph.
......
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/nodeflow.h
* \brief DGL NodeFlow class.
*/
#ifndef DGL_NODEFLOW_H_
#define DGL_NODEFLOW_H_
#include <vector>
#include <string>
#include "graph_interface.h"
namespace dgl {
class ImmutableGraph;
/*!
* \brief A NodeFlow graph stores the sampling results for a sampler that samples
* nodes/edges in layers.
*
* We store multiple layers of the sampling results in a single graph, which results
* in a more compact format. We store extra information,
* such as the node and edge mapping from the NodeFlow graph to the parent graph.
*/
struct NodeFlow {
/*! \brief The graph. */
GraphPtr graph;
/*!
* \brief the offsets of each layer.
*/
IdArray layer_offsets;
/*!
* \brief the offsets of each flow.
*/
IdArray flow_offsets;
/*!
* \brief The node mapping from the NodeFlow graph to the parent graph.
*/
IdArray node_mapping;
/*!
* \brief The edge mapping from the NodeFlow graph to the parent graph.
*/
IdArray edge_mapping;
};
/*!
* \brief Get a slice on a graph that represents a NodeFlow.
*
* The entire block has to be taken as a slice. Users have to specify the
* correct starting and ending location of a layer.
*
* If remap is false, the returned arrays can be viewed as a sub-matrix slice
* of the adjmat of the input graph. Let the adjmat of the input graph be A,
* then the slice is equal to (in numpy syntax):
* A[layer1_start:layer1_end, layer0_start:layer0_end]
*
* If remap is true, the returned arrays represents an adjacency matrix
* of shape NxM, where N is the number of nodes in layer1 and M is
* the number of nodes in layer0. Nodes in layer0 will be remapped to
* [0, M) and nodes in layer1 will be remapped to [0, N).
*
* A row of the returned adjacency matrix represents the destination
* of an edge and the column represents the source.
*
* If fmt == "csr", the function returns three arrays: indptr, indices, eid.
* If fmt == "coo", the function returns two arrays: idx, eid. Here, the idx array
* is the concatenation of src and dst node id arrays.
*
* \param graph An immutable graph.
* \param fmt the format of the returned adjacency matrix.
* \param layer0_size the size of the first layer in the block.
* \param layer1_start the location where the second layer starts.
* \param layer1_end the location where the secnd layer ends.
* \param remap Indicates to remap all vertex ids and edge Ids to local Id
* space.
* \return a vector of IdArrays.
*/
std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::string &fmt,
size_t layer0_size, size_t layer1_start,
size_t layer1_end, bool remap);
} // namespace dgl
#endif // DGL_NODEFLOW_H_
......@@ -9,40 +9,12 @@
#include <vector>
#include <string>
#include "graph_interface.h"
#include "nodeflow.h"
namespace dgl {
class ImmutableGraph;
/*!
* \brief A NodeFlow graph stores the sampling results for a sampler that samples
* nodes/edges in layers.
*
* We store multiple layers of the sampling results in a single graph, which results
* in a more compact format. We store extra information,
* such as the node and edge mapping from the NodeFlow graph to the parent graph.
*/
struct NodeFlow {
/*! \brief The graph. */
GraphPtr graph;
/*!
* \brief the offsets of each layer.
*/
IdArray layer_offsets;
/*!
* \brief the offsets of each flow.
*/
IdArray flow_offsets;
/*!
* \brief The node mapping from the NodeFlow graph to the parent graph.
*/
IdArray node_mapping;
/*!
* \brief The edge mapping from the NodeFlow graph to the parent graph.
*/
IdArray edge_mapping;
};
class SamplerOp {
public:
/*!
......
......@@ -395,6 +395,161 @@ class NodeFlow(DGLBaseGraph):
assert F.asnumpy(F.sum(ret == -1, 0)) == 0, "The eid in the parent graph is invalid."
return ret
def block_edges(self, block_id):
"""Return the edges in a block.
Parameters
----------
block_id : int
The specified block to return the edges.
Returns
-------
Tensor
The src nodes.
Tensor
The dst nodes.
Tensor
The edge ids.
"""
layer0_size = self._layer_offsets[block_id + 1] - self._layer_offsets[block_id]
rst = _CAPI_NodeFlowGetBlockAdj(self._graph._handle, "coo", layer0_size,
self._layer_offsets[block_id + 1],
self._layer_offsets[block_id + 2])
idx = utils.toindex(rst(0)).tousertensor()
eid = utils.toindex(rst(1))
num_edges = int(len(idx) / 2)
assert len(eid) == num_edges
return idx[num_edges:len(idx)], idx[0:num_edges], eid.tousertensor()
def block_adjacency_matrix(self, block_id, ctx):
"""Return the adjacency matrix representation for a specific block in a NodeFlow.
A row of the returned adjacency matrix represents the destination
of an edge and the column represents the source.
Parameters
----------
block_id : int
The specified block to return the adjacency matrix.
ctx : context
The context of the returned matrix.
Returns
-------
SparseTensor
The adjacency matrix.
Tensor
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
fmt = F.get_preferred_sparse_format()
# We need to extract two layers.
layer0_size = self._layer_offsets[block_id + 1] - self._layer_offsets[block_id]
rst = _CAPI_NodeFlowGetBlockAdj(self._graph._handle, fmt, layer0_size,
self._layer_offsets[block_id + 1],
self._layer_offsets[block_id + 2])
num_rows = self.layer_size(block_id + 1)
num_cols = self.layer_size(block_id)
if fmt == "csr":
indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
shuffle = utils.toindex(rst(2))
dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
return F.sparse_matrix(dat, ('csr', indices, indptr),
(num_rows, num_cols))[0], shuffle.tousertensor()
elif fmt == "coo":
## FIXME(minjie): data type
idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
m = self.block_size(block_id)
idx = F.reshape(idx, (2, m))
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (num_rows, num_cols))
return adj, shuffle_idx
else:
raise Exception("unknown format")
def block_incidence_matrix(self, block_id, typestr, ctx):
"""Return the incidence matrix representation of the block.
An incidence matrix is an n x m sparse matrix, where n is
the number of nodes and m is the number of edges. Each nnz
value indicating whether the edge is incident to the node
or not.
There are three types of an incidence matrix `I`:
* "in":
- I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e);
- I[v, e] = 0 otherwise.
* "out":
- I[v, e] = 1 if e is the out-edge of v (or v is the src node of e);
- I[v, e] = 0 otherwise.
* "both":
- I[v, e] = 1 if e is the in-edge of v;
- I[v, e] = -1 if e is the out-edge of v;
- I[v, e] = 0 otherwise (including self-loop).
Parameters
----------
block_id : int
The specified block to return the incidence matrix.
typestr : str
Can be either "in", "out" or "both"
ctx : context
The context of returned incidence matrix.
Returns
-------
SparseTensor
The incidence matrix.
Tensor
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
src, dst, eid = self.block_edges(block_id)
src = F.copy_to(src, ctx) # the index of the ctx will be cached
dst = F.copy_to(dst, ctx) # the index of the ctx will be cached
eid = F.copy_to(eid, ctx) # the index of the ctx will be cached
if typestr == 'in':
n = self.layer_size(block_id + 1)
m = self.block_size(block_id)
row = F.unsqueeze(dst, 0)
col = F.unsqueeze(eid, 0)
idx = F.cat([row, col], dim=0)
# FIXME(minjie): data type
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
elif typestr == 'out':
n = self.layer_size(block_id)
m = self.block_size(block_id)
row = F.unsqueeze(src, 0)
col = F.unsqueeze(eid, 0)
idx = F.cat([row, col], dim=0)
# FIXME(minjie): data type
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
elif typestr == 'both':
# TODO does it work for bipartite graph?
# first remove entries for self loops
mask = F.logical_not(F.equal(src, dst))
src = F.boolean_mask(src, mask)
dst = F.boolean_mask(dst, mask)
eid = F.boolean_mask(eid, mask)
n_entries = F.shape(src)[0]
# create index
row = F.unsqueeze(F.cat([src, dst], dim=0), 0)
col = F.unsqueeze(F.cat([eid, eid], dim=0), 0)
idx = F.cat([row, col], dim=0)
# FIXME(minjie): data type
x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx)
y = F.ones((n_entries,), dtype=F.float32, ctx=ctx)
dat = F.cat([x, y], dim=0)
inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
else:
raise DGLError('Invalid incidence matrix type: %s' % str(typestr))
return inc, shuffle_idx
def set_n_initializer(self, initializer, layer_id=ALL, field=None):
"""Set the initializer for empty node features.
......@@ -651,12 +806,13 @@ class NodeFlow(DGLBaseGraph):
assert reduce_func is not None
if is_all(v):
dest_nodes = utils.toindex(self.layer_nid(block_id + 1))
u, v, _ = self._graph.in_edges(dest_nodes)
u = utils.toindex(self._glb2lcl_nid(u.tousertensor(), block_id))
v = utils.toindex(self._glb2lcl_nid(v.tousertensor(), block_id + 1))
dest_nodes = utils.toindex(F.arange(0, self.layer_size(block_id + 1)))
eid = utils.toindex(F.arange(0, self.block_size(block_id)))
with ir.prog() as prog:
scheduler.schedule_nodeflow_update_all(graph=self,
block_id=block_id,
message_func=message_func,
reduce_func=reduce_func,
apply_func=apply_node_func)
Runtime.run(prog)
else:
dest_nodes = utils.toindex(v)
u, v, eid = self._graph.in_edges(dest_nodes)
......
......@@ -180,6 +180,7 @@ def schedule_update_all(graph,
nodes = utils.toindex(slice(0, graph.number_of_nodes()))
schedule_apply_nodes(graph, nodes, apply_func, inplace=False)
else:
# TODO is the eid here correct?
eid = utils.toindex(slice(0, graph.number_of_edges())) # shortcut for ALL
recv_nodes = utils.toindex(slice(0, graph.number_of_nodes())) # shortcut for ALL
# create vars
......@@ -243,8 +244,8 @@ def schedule_nodeflow_apply_nodes(graph,
Parameters
----------
graph: DGLGraph
The DGLGraph to use
graph: NodeFlow
The NodeFlow to use
layer_id : int
The layer where we apply node update function.
v : utils.Index
......@@ -266,6 +267,7 @@ def schedule_nodeflow_apply_nodes(graph,
return apply_func(nbatch)
afunc = var.FUNC(_afunc_wrapper)
applied_feat = ir.NODE_UDF(afunc, v_nf)
# TODO we need to avoid index_copy here.
if inplace:
ir.WRITE_ROW_INPLACE_(var_nf, var_v, applied_feat)
else:
......@@ -324,8 +326,8 @@ def schedule_nodeflow_apply_edges(graph, block_id,
Parameters
----------
graph: DGLGraph
The DGLGraph to use
graph: NodeFlow
The NodeFlow to use
block_id : int
The block whose edges we apply edge update function.
u : utils.Index
......@@ -359,6 +361,7 @@ def schedule_nodeflow_apply_edges(graph, block_id,
return apply_func(ebatch)
_efunc = var.FUNC(_efunc_wrapper)
new_fdedge = ir.EDGE_UDF(_efunc, fdsrc, fdedge, fddst)
# TODO we need to avoid index_copy here.
if inplace:
ir.WRITE_ROW_INPLACE_(var_ef, var_eid, new_fdedge)
else:
......@@ -490,6 +493,53 @@ def schedule_group_apply_edge(graph,
else:
ir.WRITE_ROW_(var_ef, var_eid, var_out)
def schedule_nodeflow_update_all(graph,
block_id,
message_func,
reduce_func,
apply_func):
"""get update_all schedule in a block.
Parameters
----------
graph: NodeFlow
The NodeFlow to use
block_id : int
The block where we perform computation.
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
"""
# A NodeFlow shouldn't have 0 edges.
assert graph.block_size(block_id) > 0
eid = utils.toindex(slice(0, graph.block_size(block_id))) # shortcut for ALL
dest_nodes = utils.toindex(slice(0, graph.layer_size(block_id + 1))) # shortcut for ALL
# create vars
var_nf = var.FEAT_DICT(graph._get_node_frame(block_id + 1), name='out_nf')
var_dest_nodes = var.IDX(dest_nodes, name='dest_nodes')
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
# TODO get all edges in the block.
src, dst, _ = graph.block_edges(block_id)
return var.IDX(utils.toindex(src)), var.IDX(utils.toindex(dst))
adj_creator = lambda: spmv.build_block_adj_matrix_graph(graph, block_id)
inc_creator = lambda: spmv.build_block_inc_matrix_graph(graph, block_id)
reduced_feat = _gen_send_reduce(graph, graph._get_node_frame(block_id),
graph._get_node_frame(block_id + 1),
graph._get_edge_frame(block_id),
message_func, reduce_func,
var_eid, var_dest_nodes,
uv_getter, adj_creator, inc_creator)
# generate optional apply
final_feat = _apply_with_accum(graph, var_dest_nodes, var_nf, reduced_feat, apply_func)
ir.WRITE_DICT_(var_nf, final_feat)
def schedule_nodeflow_compute(graph,
block_id,
u, v, eid,
......@@ -502,8 +552,8 @@ def schedule_nodeflow_compute(graph,
Parameters
----------
graph: DGLGraph
The DGLGraph to use
graph: NodeFlow
The NodeFlow to use
block_id : int
The block where we perform computation.
u : utils.Index
......@@ -527,7 +577,7 @@ def schedule_nodeflow_compute(graph,
if len(eid) == 0:
# All the nodes are 0deg; downgrades to apply.
if apply_func is not None:
schedule_nodeflow_apply_nodes(graph, block_id + 1, v, apply_func, inplace)
schedule_nodeflow_apply_nodes(graph, block_id + 1, dest_nodes, apply_func, inplace)
else:
# create vars
var_nf = var.FEAT_DICT(graph._get_node_frame(block_id + 1), name='out_nf')
......
......@@ -131,6 +131,30 @@ def gen_e2v_spmv_schedule(inc, spmv_rfunc, mfr, out):
ftdst = ir.SPMV(inc_var, ftmsg)
ir.WRITE_COL_(out, var.STR(rfn.out_field), ftdst)
def build_block_adj_matrix_graph(graph, block_id):
"""Build adjacency matrix of the whole graph.
Parameters
----------
graph : NodeFlow
The NodeFlow
block_id : int
the block Id
Returns
-------
utils.CtxCachedObject
Get be used to get adjacency matrix on the provided ctx.
utils.Index
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
#TODO why is this constructed twice?
_, shuffle_idx = graph.block_adjacency_matrix(block_id, F.cpu())
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return lambda ctx: graph.block_adjacency_matrix(block_id, ctx)[0], shuffle_idx
def build_adj_matrix_graph(graph):
"""Build adjacency matrix of the whole graph.
......@@ -148,6 +172,7 @@ def build_adj_matrix_graph(graph):
if shuffle is not required.
"""
gidx = graph._graph
# TODO Why invoking adjacency_matrix twice?
_, shuffle_idx = gidx.adjacency_matrix(False, F.cpu())
return lambda ctx: gidx.adjacency_matrix(False, ctx)[0], shuffle_idx
......@@ -226,6 +251,28 @@ def build_adj_matrix_uv(edges, reduce_nodes, num_sources):
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return utils.CtxCachedObject(lambda ctx: F.copy_to(mat, ctx)), shuffle_idx
def build_block_inc_matrix_graph(graph, block_id):
"""Build incidence matrix.
Parameters
----------
graph : NodeFlow
The NodeFlow.
block_id : int
The block Id
Returns
-------
utils.CtxCachedObject
Get be used to get incidence matrix on the provided ctx.
utils.Index
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
# inc mat will not use data tensor so conversion index is not needed
return lambda ctx: graph.block_incidence_matrix(block_id, 'in', ctx)[0], None
def build_inc_matrix_graph(graph):
"""Build incidence matrix.
......
......@@ -7,6 +7,7 @@
#include <dgl/immutable_graph.h>
#include <dgl/graph_op.h>
#include <dgl/sampler.h>
#include <dgl/nodeflow.h>
#include "../c_api_common.h"
using dgl::runtime::DGLArgs;
......@@ -419,6 +420,19 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
*rv = ConvertAdjToPackedFunc(res);
});
DGL_REGISTER_GLOBAL("nodeflow._CAPI_NodeFlowGetBlockAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::string format = args[1];
int64_t layer0_size = args[2];
int64_t start = args[3];
int64_t end = args[4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph* gptr = dynamic_cast<const ImmutableGraph*>(ptr);
auto res = GetNodeFlowSlice(*gptr, format, layer0_size, start, end, true);
*rv = ConvertAdjToPackedFunc(res);
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphRandomWalk")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
......
......@@ -604,43 +604,39 @@ Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const {
return subg;
}
ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const {
auto in_csr = GetInCSR();
IdArray indptr = IdArray::Empty({static_cast<int64_t>(in_csr->indptr.size())},
ImmutableGraph::CSRArray GetCSRArray(ImmutableGraph::CSR::Ptr csr, size_t start, size_t end) {
size_t num_rows = end - start;
size_t nnz = csr->indptr[end] - csr->indptr[start];
IdArray indptr = IdArray::Empty({static_cast<int64_t>(num_rows + 1)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({static_cast<int64_t>(in_csr->NumEdges())},
IdArray indices = IdArray::Empty({static_cast<int64_t>(nnz)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eids = IdArray::Empty({static_cast<int64_t>(in_csr->NumEdges())},
IdArray eids = IdArray::Empty({static_cast<int64_t>(nnz)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
std::copy(in_csr->indptr.begin(), in_csr->indptr.end(), indptr_data);
std::copy(in_csr->indices.begin(), in_csr->indices.end(), indices_data);
std::copy(in_csr->edge_ids.begin(), in_csr->edge_ids.end(), eid_data);
return CSRArray{indptr, indices, eids};
for (size_t i = start; i < end + 1; i++)
indptr_data[i - start] = csr->indptr[i] - csr->indptr[start];
std::copy(csr->indices.begin() + csr->indptr[start],
csr->indices.begin() + csr->indptr[end], indices_data);
std::copy(csr->edge_ids.begin() + csr->indptr[start],
csr->edge_ids.begin() + csr->indptr[end], eid_data);
return ImmutableGraph::CSRArray{indptr, indices, eids};
}
ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray() const {
auto out_csr = GetOutCSR();
IdArray indptr = IdArray::Empty({static_cast<int64_t>(out_csr->indptr.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({static_cast<int64_t>(out_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eids = IdArray::Empty({static_cast<int64_t>(out_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
std::copy(out_csr->indptr.begin(), out_csr->indptr.end(), indptr_data);
std::copy(out_csr->indices.begin(), out_csr->indices.end(), indices_data);
std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), eid_data);
return CSRArray{indptr, indices, eids};
ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray(size_t start, size_t end) const {
return GetCSRArray(GetInCSR(), start, end);
}
ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray(size_t start, size_t end) const {
return GetCSRArray(GetOutCSR(), start, end);
}
std::vector<IdArray> ImmutableGraph::GetAdj(bool transpose, const std::string &fmt) const {
if (fmt == "csr") {
CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetInCSRArray();
CSRArray arrs = transpose ? this->GetOutCSRArray(0, NumVertices())
: this->GetInCSRArray(0, NumVertices());
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
int64_t num_edges = this->NumEdges();
......
/*!
* Copyright (c) 2019 by Contributors
* \file graph/nodeflow.cc
* \brief DGL NodeFlow related functions.
*/
#include <dgl/immutable_graph.h>
#include <dgl/nodeflow.h>
#include <string.h>
#include "../c_api_common.h"
namespace dgl {
std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::string &fmt,
size_t layer0_size, size_t layer1_start,
size_t layer1_end, bool remap) {
CHECK_GE(layer1_start, layer0_size);
if (fmt == "csr") {
dgl_id_t first_vid = layer1_start - layer0_size;
ImmutableGraph::CSRArray arrs = graph.GetInCSRArray(layer1_start, layer1_end);
if (remap) {
dgl_id_t *indices_data = static_cast<dgl_id_t*>(arrs.indices->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(arrs.id->data);
const size_t len = arrs.indices->shape[0];
dgl_id_t first_eid = eid_data[0];
for (size_t i = 0; i < len; i++) {
CHECK_GE(indices_data[i], first_vid);
indices_data[i] -= first_vid;
CHECK_GE(eid_data[i], first_eid);
eid_data[i] -= first_eid;
}
}
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
ImmutableGraph::CSR::Ptr csr = graph.GetInCSR();
int64_t nnz = csr->indptr[layer1_end] - csr->indptr[layer1_start];
IdArray idx = IdArray::Empty({2 * nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *idx_data = static_cast<int64_t*>(idx->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(eid->data);
size_t num_edges = 0;
for (size_t i = layer1_start; i < layer1_end; i++) {
for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++) {
// These nodes are all in a layer. We need to remap them to the node id
// local to the layer.
idx_data[num_edges] = remap ? i - layer1_start : i;
num_edges++;
}
}
CHECK_EQ(num_edges, nnz);
if (remap) {
size_t edge_start = csr->indptr[layer1_start];
dgl_id_t first_eid = csr->edge_ids[edge_start];
dgl_id_t first_vid = layer1_start - layer0_size;
for (int64_t i = 0; i < nnz; i++) {
CHECK_GE(csr->indices[edge_start + i], first_vid);
idx_data[nnz + i] = csr->indices[edge_start + i] - first_vid;
eid_data[i] = csr->edge_ids[edge_start + i] - first_eid;
}
} else {
std::copy(csr->indices.begin() + csr->indptr[layer1_start],
csr->indices.begin() + csr->indptr[layer1_end], idx_data + nnz);
std::copy(csr->edge_ids.begin() + csr->indptr[layer1_start],
csr->edge_ids.begin() + csr->indptr[layer1_end], eid_data);
}
return std::vector<IdArray>{idx, eid};
} else {
LOG(FATAL) << "unsupported adjacency matrix format";
return std::vector<IdArray>();
}
}
} // namespace dgl
......@@ -250,8 +250,33 @@ def test_copy():
nf.block_compute(i, partial(msg_func, ind=i), partial(reduce_func, ind=i))
def test_block_adj_matrix():
num_layers = 3
g = generate_rand_graph(100)
nf = create_mini_batch(g, num_layers)
assert nf.num_layers == num_layers + 1
for i in range(nf.num_blocks):
src, dst, eid = nf.block_edges(i)
dest_nodes = utils.toindex(nf.layer_nid(i + 1))
u, v, _ = nf._graph.in_edges(dest_nodes)
u = nf._glb2lcl_nid(u.tousertensor(), i)
v = nf._glb2lcl_nid(v.tousertensor(), i + 1)
assert F.array_equal(src, u)
assert F.array_equal(dst, v)
adj, _ = nf.block_adjacency_matrix(i, F.cpu())
adj = F.sparse_to_numpy(adj)
data = np.ones((len(u)), dtype=np.float32)
v = utils.toindex(v)
u = utils.toindex(u)
coo = sp.sparse.coo_matrix((data, (v.tonumpy(), u.tonumpy())),
shape=adj.shape).todense()
assert np.array_equal(adj, coo)
if __name__ == '__main__':
test_basic()
test_block_adj_matrix()
test_copy()
test_apply_nodes()
test_apply_edges()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment