Unverified Commit 605b5185 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Refactor] Immutable graph index (#543)

* WIP

* header

* WIP .cc

* WIP

* transpose

* wip

* immutable graph .h and .cc

* WIP: nodeflow.cc

* compile

* remove all tmp dl managed ctx; they caused refcount issue

* one simple test

* WIP: testing

* test_graph

* fix graph index

* fix bug in sampler; pass pytorch utest

* WIP on mxnet

* fix lint

* fix mxnet unittest w/ unfortunate workaround

* fix msvc

* fix lint

* SliceRows and test_nodeflow

* resolve reviews

* resolve reviews

* try fix win ci

* try fix win ci

* poke win ci again

* poke

* lazy multigraph flag; stackoverflow error

* revert node subgraph test

* lazy object

* try fix win build

* try fix win build

* poke ci

* fix build script

* fix compile

* add a todo

* fix reviews

* fix compile
parent b2b8be25
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/array.h
* \brief Array types and common array operations required by DGL.
*
* Note that this is not meant for a full support of array library such as ATen.
* Only a limited set of operators required by DGL are implemented.
*/
#ifndef DGL_ARRAY_H_
#define DGL_ARRAY_H_
#include <dgl/runtime/ndarray.h>
#include <vector>
namespace dgl {
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
typedef dgl::runtime::NDArray FloatArray;
/*! \brief Create a new id array with given length (on CPU) */
IdArray NewIdArray(int64_t length);
/*! \brief Create a new id array with the given vector data (on CPU) */
IdArray VecToIdArray(const std::vector<dgl_id_t>& vec);
/*! \brief Create a copy of the given array */
IdArray Clone(IdArray arr);
/*! \brief Arithmetic functions */
IdArray Add(IdArray lhs, IdArray rhs);
IdArray Sub(IdArray lhs, IdArray rhs);
IdArray Mul(IdArray lhs, IdArray rhs);
IdArray Div(IdArray lhs, IdArray rhs);
IdArray Add(IdArray lhs, dgl_id_t rhs);
IdArray Sub(IdArray lhs, dgl_id_t rhs);
IdArray Mul(IdArray lhs, dgl_id_t rhs);
IdArray Div(IdArray lhs, dgl_id_t rhs);
IdArray Add(dgl_id_t lhs, IdArray rhs);
IdArray Sub(dgl_id_t lhs, IdArray rhs);
IdArray Mul(dgl_id_t lhs, IdArray rhs);
IdArray Div(dgl_id_t lhs, IdArray rhs);
/*! \brief Stack two arrays (of len L) into a 2*L length array */
IdArray HStack(IdArray arr1, IdArray arr2);
/*! \brief Plain CSR matrix */
struct CSRMatrix {
IdArray indptr, indices, data;
};
/*! \brief Plain COO structure */
struct COOMatrix {
IdArray row, col, data;
};
/*! \brief Slice rows of the given matrix and return. */
CSRMatrix SliceRows(const CSRMatrix& csr, int64_t start, int64_t end);
/*! \brief Convert COO matrix to CSR matrix. */
CSRMatrix ToCSR(const COOMatrix);
/*! \brief Convert COO matrix to CSR matrix. */
COOMatrix ToCOO(const CSRMatrix);
} // namespace dgl
#endif // DGL_ARRAY_H_
......@@ -40,8 +40,7 @@ class Graph: public GraphInterface {
explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief construct a graph from the coo format. */
Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes, bool multigraph = false);
/*! \brief default copy constructor */
Graph(const Graph& other) = default;
......
......@@ -9,16 +9,11 @@
#include <string>
#include <vector>
#include <utility>
#include "runtime/ndarray.h"
#include <algorithm>
namespace dgl {
#include "array.h"
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
typedef dgl::runtime::NDArray FloatArray;
namespace dgl {
struct Subgraph;
struct NodeFlow;
......@@ -32,8 +27,10 @@ const dgl_id_t DGL_INVALID_ID = static_cast<dgl_id_t>(-1);
* but it doesn't own data itself. instead, it only references data in std::vector.
*/
class DGLIdIters {
const dgl_id_t *begin_, *end_;
public:
/* !\brief default constructor to create an empty range */
DGLIdIters() {}
/* !\brief constructor with given begin and end */
DGLIdIters(const dgl_id_t *begin, const dgl_id_t *end) {
this->begin_ = begin;
this->end_ = end;
......@@ -50,6 +47,8 @@ class DGLIdIters {
size_t size() const {
return this->end_ - this->begin_;
}
private:
const dgl_id_t *begin_{nullptr}, *end_{nullptr};
};
class GraphInterface;
......@@ -118,13 +117,49 @@ class GraphInterface {
virtual bool HasVertex(dgl_id_t vid) const = 0;
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
virtual BoolArray HasVertices(IdArray vids) const = 0;
virtual BoolArray HasVertices(IdArray vids) const {
const auto len = vids->shape[0];
BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const uint64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
}
return rst;
}
/*! \return true if the given edge is in the graph.*/
virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0;
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const {
const auto srclen = src_ids->shape[0];
const auto dstlen = dst_ids->shape[0];
const auto rstlen = std::max(srclen, dstlen);
BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
if (srclen == 1) {
// one-many
for (int64_t i = 0; i < dstlen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0;
}
} else if (dstlen == 1) {
// many-one
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0;
}
} else {
// many-many
CHECK(srclen == dstlen) << "Invalid src and dst id array.";
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0;
}
}
return rst;
}
/*!
* \brief Find the predecessors of a vertex.
......@@ -329,6 +364,13 @@ class GraphInterface {
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
*
* If the fmt is 'csr', the function should return three arrays, representing
* indptr, indices and edge ids
*
* If the fmt is 'coo', the function should return one array of shape (2, nnz),
* representing a horitonzal stack of row and col indices.
*
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of IdArrays.
......
This diff is collapsed.
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/lazy.h
* \brief Lazy object that will be materialized only when being queried.
*/
#ifndef DGL_LAZY_H_
#define DGL_LAZY_H_
#include <memory>
namespace dgl {
/*!
* \brief Lazy object that will be materialized only when being queried.
*
* The object should be immutable -- no mutation once materialized.
* The object is currently not threaad safe.
*/
template <typename T>
class LazyObject {
public:
/*!\brief default constructor to construct a lazy object */
LazyObject() {}
/*!\brief constructor to construct an object with given value (non-lazy case) */
explicit LazyObject(const T& val): ptr_(new T(val)) {}
/*!\brief destructor */
~LazyObject() = default;
/*!
* \brief Get the value of this object. If the object has not been instantiated,
* using the provided function to create it.
* \param fn The creator function.
* \return the object value.
*/
template <typename Fn>
const T& Get(Fn fn) {
if (!ptr_) {
ptr_.reset(new T(fn()));
}
return *ptr_;
}
private:
/*!\brief the internal data pointer */
std::shared_ptr<T> ptr_{nullptr};
};
} // namespace dgl
#endif // DGL_LAZY_H_
......@@ -92,6 +92,8 @@ class NDArray {
inline int use_count() const;
/*! \return Pointer to content of DLTensor */
inline const DLTensor* operator->() const;
/*! \return True if the ndarray is contiguous. */
bool IsContiguous() const;
/*!
* \brief Copy data content from another array.
* \param other The source array to be copied from.
......@@ -129,10 +131,11 @@ class NDArray {
* \brief Create a NDArray that shares the data memory with the current one.
* \param shape The shape of the new array.
* \param dtype The data type of the new array.
* \param offset The offset (in bytes) of the starting pointer.
* \note The memory size of new array must be smaller than the current one.
*/
DGL_DLL NDArray CreateView(
std::vector<int64_t> shape, DLDataType dtype);
std::vector<int64_t> shape, DLDataType dtype, int64_t offset = 0);
/*!
* \brief Create a reference view of NDArray that
* represents as DLManagedTensor.
......
......@@ -898,6 +898,7 @@ class DGLGraph(DGLBaseGraph):
readonly=False):
# graph
super(DGLGraph, self).__init__(create_graph_index(graph_data, multigraph, readonly))
# node and edge frame
if node_frame is None:
self._node_frame = FrameRef(Frame(num_rows=self.number_of_nodes()))
......@@ -1977,7 +1978,7 @@ class DGLGraph(DGLBaseGraph):
assert func is not None
if is_all(edges):
u, v, _ = self._graph.edges()
u, v, _ = self._graph.edges('eid')
eid = utils.toindex(slice(0, self.number_of_edges()))
elif isinstance(edges, tuple):
u, v = edges
......
......@@ -51,21 +51,19 @@ class GraphIndex(object):
self._multigraph = multigraph
self._readonly = readonly
if readonly:
self._init(src, dst, utils.toindex(F.arange(0, len(src))), n_nodes)
self._init(src, dst, n_nodes)
else:
self._handle = _CAPI_DGLGraphCreateMutable(multigraph)
self.clear()
self.add_nodes(n_nodes)
self.add_edges(src, dst)
def _init(self, src_ids, dst_ids, edge_ids, num_nodes):
def _init(self, src_ids, dst_ids, num_nodes):
"""The actual init function"""
assert len(src_ids) == len(dst_ids)
assert len(src_ids) == len(edge_ids)
self._handle = _CAPI_DGLGraphCreate(
src_ids.todgltensor(),
dst_ids.todgltensor(),
edge_ids.todgltensor(),
self._multigraph,
int(num_nodes),
self._readonly)
......@@ -631,8 +629,9 @@ class GraphIndex(object):
indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
shuffle = utils.toindex(rst(2))
dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
return F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0], shuffle
spmat = F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0]
return spmat, shuffle
elif fmt == "coo":
## FIXME(minjie): data type
idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
......@@ -786,13 +785,11 @@ class GraphIndex(object):
for e in nx_graph.edges:
src.append(e[0])
dst.append(e[1])
eid = np.arange(0, len(src), dtype=np.int64)
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
eid = utils.toindex(eid)
src = utils.toindex(src)
dst = utils.toindex(dst)
self._init(src, dst, eid, num_nodes)
self._init(src, dst, num_nodes)
def from_scipy_sparse_matrix(self, adj):
......@@ -808,9 +805,7 @@ class GraphIndex(object):
adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col)
edge_ids = utils.toindex(F.arange(0, len(adj_coo.row)))
self._init(src, dst, edge_ids, num_nodes)
self._init(src, dst, num_nodes)
def from_csr_matrix(self, indptr, indices, edge_dir, shared_mem_name=""):
"""Load a graph from the CSR matrix.
......@@ -881,8 +876,7 @@ class GraphIndex(object):
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
edge_ids = utils.toindex(F.arange(0, len(src)))
self._init(src_ids, dst_ids, edge_ids, num_nodes)
self._init(src_ids, dst_ids, num_nodes)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
......
......@@ -191,7 +191,7 @@ def schedule_update_all(graph,
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
src, dst, _ = graph._graph.edges()
src, dst, _ = graph._graph.edges('eid')
return var.IDX(src), var.IDX(dst)
adj_creator = lambda: spmv.build_adj_matrix_graph(graph)
inc_creator = lambda: spmv.build_inc_matrix_graph(graph)
......
/*!
* Copyright (c) 2019 by Contributors
* \file array.cc
* \brief DGL array utilities implementation
*/
#include <dgl/array.h>
namespace dgl {
// TODO(minjie): currently these operators are only on CPU.
IdArray NewIdArray(int64_t length) {
return IdArray::Empty({length}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
}
IdArray VecToIdArray(const std::vector<dgl_id_t>& vec) {
IdArray ret = NewIdArray(vec.size());
std::copy(vec.begin(), vec.end(), static_cast<dgl_id_t*>(ret->data));
return ret;
}
IdArray Clone(IdArray arr) {
IdArray ret = NewIdArray(arr->shape[0]);
ret.CopyFrom(arr);
return ret;
}
IdArray Add(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] + rhs_data[i];
}
return ret;
}
IdArray Sub(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] - rhs_data[i];
}
return ret;
}
IdArray Mul(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] * rhs_data[i];
}
return ret;
}
IdArray Div(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] / rhs_data[i];
}
return ret;
}
IdArray Add(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] + rhs;
}
return ret;
}
IdArray Sub(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] - rhs;
}
return ret;
}
IdArray Mul(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] * rhs;
}
return ret;
}
IdArray Div(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] / rhs;
}
return ret;
}
IdArray Add(dgl_id_t lhs, IdArray rhs) {
return Add(rhs, lhs);
}
IdArray Sub(dgl_id_t lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0]);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < rhs->shape[0]; ++i) {
ret_data[i] = lhs - rhs_data[i];
}
return ret;
}
IdArray Mul(dgl_id_t lhs, IdArray rhs) {
return Mul(rhs, lhs);
}
IdArray Div(dgl_id_t lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0]);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < rhs->shape[0]; ++i) {
ret_data[i] = lhs / rhs_data[i];
}
return ret;
}
IdArray HStack(IdArray arr1, IdArray arr2) {
CHECK_EQ(arr1->shape[0], arr2->shape[0]);
const int64_t L = arr1->shape[0];
IdArray ret = NewIdArray(2 * L);
const dgl_id_t* arr1_data = static_cast<dgl_id_t*>(arr1->data);
const dgl_id_t* arr2_data = static_cast<dgl_id_t*>(arr2->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < L; ++i) {
ret_data[i] = arr1_data[i];
ret_data[i + L] = arr2_data[i];
}
return ret;
}
CSRMatrix SliceRows(const CSRMatrix& csr, int64_t start, int64_t end) {
const dgl_id_t* indptr = static_cast<dgl_id_t*>(csr.indptr->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(csr.indices->data);
const dgl_id_t* data = static_cast<dgl_id_t*>(csr.data->data);
const int64_t num_rows = end - start;
const int64_t nnz = indptr[end] - indptr[start];
CSRMatrix ret;
ret.indptr = NewIdArray(num_rows + 1);
ret.indices = NewIdArray(nnz);
ret.data = NewIdArray(nnz);
dgl_id_t* r_indptr = static_cast<dgl_id_t*>(ret.indptr->data);
dgl_id_t* r_indices = static_cast<dgl_id_t*>(ret.indices->data);
dgl_id_t* r_data = static_cast<dgl_id_t*>(ret.data->data);
for (int64_t i = start; i < end + 1; ++i) {
r_indptr[i - start] = indptr[i] - indptr[start];
}
std::copy(indices + indptr[start], indices + indptr[end], r_indices);
std::copy(data + indptr[start], data + indptr[end], r_data);
return ret;
}
} // namespace dgl
......@@ -13,15 +13,6 @@ using dgl::runtime::NDArray;
namespace dgl {
DLManagedTensor* CreateTmpDLManagedTensor(const DGLArgValue& arg) {
const DLTensor* dl_tensor = arg;
DLManagedTensor* ret = new DLManagedTensor();
ret->deleter = [] (DLManagedTensor* self) { delete self; };
ret->manager_ctx = nullptr;
ret->dl_tensor = *dl_tensor;
return ret;
}
PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
auto body = [vec](DGLArgs args, DGLRetValue* rv) {
const uint64_t which = args[0];
......
......@@ -19,13 +19,6 @@ typedef void* GraphHandle;
// Communicator handler type
typedef void* CommunicatorHandle;
/*!
* \brief Convert the given DLTensor to DLManagedTensor.
*
* Return a temporary DLManagedTensor that does not own memory.
*/
DLManagedTensor* CreateTmpDLManagedTensor(
const dgl::runtime::DGLArgValue& arg);
/*!
* \brief Convert a vector of NDArray to PackedFunc.
......
......@@ -14,33 +14,28 @@
namespace dgl {
Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
Graph::Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes,
bool multigraph): is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids));
CHECK(IsValidIdArray(dst_ids));
CHECK(IsValidIdArray(edge_ids));
this->AddVertices(num_nodes);
num_edges_ = src_ids->shape[0];
CHECK(static_cast<int64_t>(num_edges_) == dst_ids->shape[0])
<< "vectors in COO must have the same length";
CHECK(static_cast<int64_t>(num_edges_) == edge_ids->shape[0])
<< "vectors in COO must have the same length";
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
all_edges_src_.reserve(num_edges_);
all_edges_dst_.reserve(num_edges_);
for (uint64_t i = 0; i < num_edges_; i++) {
auto src = src_data[i];
auto dst = dst_data[i];
auto eid = edge_data[i];
CHECK(HasVertex(src) && HasVertex(dst))
<< "Invalid vertices: src=" << src << " dst=" << dst;
adjlist_[src].succ.push_back(dst);
adjlist_[src].edge_id.push_back(eid);
adjlist_[src].edge_id.push_back(i);
reverse_adjlist_[dst].succ.push_back(src);
reverse_adjlist_[dst].edge_id.push_back(eid);
reverse_adjlist_[dst].edge_id.push_back(i);
all_edges_src_.push_back(src);
all_edges_dst_.push_back(dst);
......@@ -104,7 +99,7 @@ BoolArray Graph::HasVertices(IdArray vids) const {
int64_t* rst_data = static_cast<int64_t*>(rst->data);
const int64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
rst_data[i] = (vid_data[i] < nverts && vid_data[i] >= 0)? 1 : 0;
}
return rst;
}
......
......@@ -77,14 +77,13 @@ namespace {
template<typename T>
void DGLDisjointPartitionByNum(const T *gptr, DGLArgs args, DGLRetValue *rv) {
int64_t num = args[1];
const std::vector<T> &&rst = GraphOp::DisjointPartitionByNum(gptr, num);
std::vector<T> &&rst = GraphOp::DisjointPartitionByNum(gptr, num);
// return the pointer array as an integer array
const int64_t len = rst.size();
NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *ptr_array_data = static_cast<int64_t *>(ptr_array->data);
for (size_t i = 0; i < rst.size(); ++i) {
T *ptr = new T();
*ptr = std::move(rst[i]);
GraphInterface *ptr = rst[i].Reset();
ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
}
*rv = ptr_array;
......@@ -100,9 +99,7 @@ void DGLDisjointUnion(GraphHandle *inhandles, int list_size, DGLRetValue *rv) {
graphs.push_back(gr);
}
T *gptr = new T();
*gptr = GraphOp::DisjointUnion(std::move(graphs));
GraphHandle ghandle = gptr;
GraphHandle ghandle = GraphOp::DisjointUnion(std::move(graphs)).Reset();
*rv = ghandle;
}
......@@ -114,8 +111,7 @@ void DGLDisjointPartitionBySizes(const T *gptr, const IdArray sizes, DGLRetValue
NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *ptr_array_data = static_cast<int64_t *>(ptr_array->data);
for (size_t i = 0; i < rst.size(); ++i) {
T *ptr = new T();
*ptr = std::move(rst[i]);
GraphInterface *ptr = rst[i].Reset();
ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
}
*rv = ptr_array;
......@@ -133,39 +129,48 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const bool multigraph = static_cast<bool>(args[3]);
const int64_t num_nodes = static_cast<int64_t>(args[4]);
const bool readonly = static_cast<bool>(args[5]);
const IdArray src_ids = args[0];
const IdArray dst_ids = args[1];
const bool multigraph = static_cast<bool>(args[2]);
const int64_t num_nodes = static_cast<int64_t>(args[3]);
const bool readonly = static_cast<bool>(args[4]);
GraphHandle ghandle;
if (readonly)
ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
else
ghandle = new Graph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
if (readonly) {
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
COOPtr coo(new COO(num_nodes, Clone(src_ids), Clone(dst_ids), multigraph));
ghandle = new ImmutableGraph(coo);
} else {
ghandle = new Graph(src_ids, dst_ids, num_nodes, multigraph);
}
*rv = ghandle;
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray indptr = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray indices = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray indptr = args[0];
const IdArray indices = args[1];
const IdArray edge_ids = args[2];
const std::string shared_mem_name = args[3];
const bool multigraph = static_cast<bool>(args[4]);
const std::string edge_dir = args[5];
ImmutableGraph::CSR::Ptr csr;
CSRPtr csr;
if (shared_mem_name.empty())
csr.reset(new ImmutableGraph::CSR(indptr, indices, edge_ids));
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
csr.reset(new CSR(Clone(indptr), Clone(indices), Clone(edge_ids), multigraph));
else
csr.reset(new ImmutableGraph::CSR(indptr, indices, edge_ids, shared_mem_name));
csr.reset(new CSR(indptr, indices, edge_ids, multigraph, shared_mem_name));
GraphHandle ghandle;
if (edge_dir == "in")
ghandle = new ImmutableGraph(csr, nullptr, multigraph);
ghandle = new ImmutableGraph(csr, nullptr);
else
ghandle = new ImmutableGraph(nullptr, csr, multigraph);
ghandle = new ImmutableGraph(nullptr, csr);
*rv = ghandle;
});
......@@ -176,13 +181,13 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreateMMap")
const int64_t num_edges = args[2];
const bool multigraph = static_cast<bool>(args[3]);
const std::string edge_dir = args[4];
ImmutableGraph::CSR::Ptr csr(new ImmutableGraph::CSR(shared_mem_name,
num_vertices, num_edges));
// TODO(minjie): how to know multigraph
CSRPtr csr(new CSR(shared_mem_name, num_vertices, num_edges, multigraph));
GraphHandle ghandle;
if (edge_dir == "in")
ghandle = new ImmutableGraph(csr, nullptr, multigraph);
ghandle = new ImmutableGraph(csr, nullptr);
else
ghandle = new ImmutableGraph(nullptr, csr, multigraph);
ghandle = new ImmutableGraph(nullptr, csr);
*rv = ghandle;
});
......@@ -214,8 +219,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
gptr->AddEdges(src, dst);
});
......@@ -268,14 +273,14 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->HasVertices(vids);
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray parent_vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray query = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray parent_vids = args[0];
const IdArray query = args[1];
*rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
});
......@@ -292,8 +297,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
*rv = gptr->HasEdgesBetween(src, dst);
});
......@@ -328,8 +333,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
*rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst));
});
......@@ -337,7 +342,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray eids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids));
});
......@@ -353,7 +358,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids));
});
......@@ -369,7 +374,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids));
});
......@@ -393,7 +398,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->InDegrees(vids);
});
......@@ -409,7 +414,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->OutDegrees(vids);
});
......@@ -417,7 +422,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids));
});
......@@ -425,7 +430,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface *gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray eids = args[1];
*rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids));
});
......@@ -462,7 +467,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray sizes = args[1];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const Graph* gptr = dynamic_cast<const Graph*>(ptr);
const ImmutableGraph* im_gptr = dynamic_cast<const ImmutableGraph*>(ptr);
......@@ -497,17 +502,4 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
*rv = ConvertAdjToPackedFunc(res);
});
DGL_REGISTER_GLOBAL("nodeflow._CAPI_NodeFlowGetBlockAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::string format = args[1];
int64_t layer0_size = args[2];
int64_t start = args[3];
int64_t end = args[4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph* gptr = dynamic_cast<const ImmutableGraph*>(ptr);
auto res = GetNodeFlowSlice(*gptr, format, layer0_size, start, end, true);
*rv = ConvertAdjToPackedFunc(res);
});
} // namespace dgl
......@@ -108,40 +108,44 @@ std::vector<Graph> GraphOp::DisjointPartitionBySizes(const Graph* graph, IdArray
ImmutableGraph GraphOp::DisjointUnion(std::vector<const ImmutableGraph *> graphs) {
dgl_id_t num_nodes = 0;
dgl_id_t num_edges = 0;
int64_t num_nodes = 0;
int64_t num_edges = 0;
for (const ImmutableGraph *gr : graphs) {
num_nodes += gr->NumVertices();
num_edges += gr->NumEdges();
}
ImmutableGraph::CSR::Ptr batched_csr_ptr = std::make_shared<ImmutableGraph::CSR>(num_nodes,
num_edges);
batched_csr_ptr->indptr[0] = 0;
IdArray indptr_arr = NewIdArray(num_nodes + 1);
IdArray indices_arr = NewIdArray(num_edges);
IdArray edge_ids_arr = NewIdArray(num_edges);
dgl_id_t* indptr = static_cast<dgl_id_t*>(indptr_arr->data);
dgl_id_t* indices = static_cast<dgl_id_t*>(indices_arr->data);
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(edge_ids_arr->data);
indptr[0] = 0;
dgl_id_t cum_num_nodes = 0;
dgl_id_t cum_num_edges = 0;
dgl_id_t indptr_idx = 1;
for (const ImmutableGraph *gr : graphs) {
const ImmutableGraph::CSR::Ptr &g_csrptr = gr->GetInCSR();
dgl_id_t g_num_nodes = g_csrptr->NumVertices();
dgl_id_t g_num_edges = g_csrptr->NumEdges();
ImmutableGraph::CSR::vector<dgl_id_t> &g_indices = g_csrptr->indices;
ImmutableGraph::CSR::vector<int64_t> &g_indptr = g_csrptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &g_edge_ids = g_csrptr->edge_ids;
for (dgl_id_t i = 1; i < g_indptr.size(); ++i) {
batched_csr_ptr->indptr[indptr_idx] = g_indptr[i] + cum_num_edges;
indptr_idx++;
const CSRPtr g_csrptr = gr->GetInCSR();
const int64_t g_num_nodes = g_csrptr->NumVertices();
const int64_t g_num_edges = g_csrptr->NumEdges();
dgl_id_t* g_indptr = static_cast<dgl_id_t*>(g_csrptr->indptr()->data);
dgl_id_t* g_indices = static_cast<dgl_id_t*>(g_csrptr->indices()->data);
dgl_id_t* g_edge_ids = static_cast<dgl_id_t*>(g_csrptr->edge_ids()->data);
for (dgl_id_t i = 1; i < g_num_nodes + 1; ++i) {
indptr[cum_num_nodes + i] = g_indptr[i] + cum_num_edges;
}
for (dgl_id_t i = 0; i < g_indices.size(); ++i) {
batched_csr_ptr->indices.push_back(g_indices[i] + cum_num_nodes);
for (dgl_id_t i = 0; i < g_num_edges; ++i) {
indices[cum_num_edges + i] = g_indices[i] + cum_num_nodes;
}
for (dgl_id_t i = 0; i < g_edge_ids.size(); ++i) {
batched_csr_ptr->edge_ids.push_back(g_edge_ids[i] + cum_num_edges);
for (dgl_id_t i = 0; i < g_num_edges; ++i) {
edge_ids[cum_num_edges + i] = g_edge_ids[i] + cum_num_edges;
}
cum_num_nodes += g_num_nodes;
cum_num_edges += g_num_edges;
}
CSRPtr batched_csr_ptr = CSRPtr(new CSR(indptr_arr, indices_arr, edge_ids_arr));
return ImmutableGraph(batched_csr_ptr, nullptr);
}
......@@ -157,9 +161,11 @@ std::vector<ImmutableGraph> GraphOp::DisjointPartitionByNum(const ImmutableGraph
std::vector<ImmutableGraph> GraphOp::DisjointPartitionBySizes(const ImmutableGraph *batched_graph,
IdArray sizes) {
// TODO(minjie): use array views to speedup this operation
const int64_t len = sizes->shape[0];
const int64_t *sizes_data = static_cast<int64_t *>(sizes->data);
std::vector<int64_t> cumsum;
cumsum.reserve(len + 1);
cumsum.push_back(0);
for (int64_t i = 0; i < len; ++i) {
cumsum.push_back(cumsum[i] + sizes_data[i]);
......@@ -167,35 +173,40 @@ std::vector<ImmutableGraph> GraphOp::DisjointPartitionBySizes(const ImmutableGra
CHECK_EQ(cumsum[len], batched_graph->NumVertices())
<< "Sum of the given sizes must equal to the number of nodes.";
std::vector<ImmutableGraph> rst;
const ImmutableGraph::CSR::Ptr &in_csr_ptr = batched_graph->GetInCSR();
ImmutableGraph::CSR::vector<int64_t> &bg_indptr = in_csr_ptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &bg_indices = in_csr_ptr->indices;
CSRPtr in_csr_ptr = batched_graph->GetInCSR();
const dgl_id_t* indptr = static_cast<dgl_id_t*>(in_csr_ptr->indptr()->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(in_csr_ptr->indices()->data);
const dgl_id_t* edge_ids = static_cast<dgl_id_t*>(in_csr_ptr->edge_ids()->data);
dgl_id_t cum_sum_edges = 0;
for (int64_t i = 0; i < len; ++i) {
int64_t start_pos = cumsum[i];
int64_t end_pos = cumsum[i + 1];
int64_t g_num_edges = bg_indptr[end_pos] - bg_indptr[start_pos];
ImmutableGraph::CSR::Ptr g_in_csr_ptr = std::make_shared<ImmutableGraph::CSR>(sizes_data[i],
g_num_edges);
ImmutableGraph::CSR::vector<int64_t> &g_indptr = g_in_csr_ptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &g_indices = g_in_csr_ptr->indices;
ImmutableGraph::CSR::vector<dgl_id_t> &g_edge_ids = g_in_csr_ptr->edge_ids;
const int64_t start_pos = cumsum[i];
const int64_t end_pos = cumsum[i + 1];
const int64_t g_num_nodes = sizes_data[i];
const int64_t g_num_edges = indptr[end_pos] - indptr[start_pos];
IdArray indptr_arr = NewIdArray(g_num_nodes + 1);
IdArray indices_arr = NewIdArray(g_num_edges);
IdArray edge_ids_arr = NewIdArray(g_num_edges);
dgl_id_t* g_indptr = static_cast<dgl_id_t*>(indptr_arr->data);
dgl_id_t* g_indices = static_cast<dgl_id_t*>(indices_arr->data);
dgl_id_t* g_edge_ids = static_cast<dgl_id_t*>(edge_ids_arr->data);
const dgl_id_t idoff = indptr[start_pos];
g_indptr[0] = 0;
for (int l = start_pos + 1; l < end_pos + 1; ++l) {
g_indptr[l - start_pos] = bg_indptr[l] - bg_indptr[start_pos];
g_indptr[l - start_pos] = indptr[l] - indptr[start_pos];
}
for (int j = bg_indptr[start_pos]; j < bg_indptr[end_pos]; ++j) {
g_indices.push_back(bg_indices[j] - cumsum[i]);
for (int j = indptr[start_pos]; j < indptr[end_pos]; ++j) {
g_indices[j - idoff] = indices[j] - cumsum[i];
}
for (int k = bg_indptr[start_pos]; k < bg_indptr[end_pos]; ++k) {
g_edge_ids.push_back(in_csr_ptr->edge_ids[k] - cum_sum_edges);
for (int k = indptr[start_pos]; k < indptr[end_pos]; ++k) {
g_edge_ids[k - idoff] = edge_ids[k] - cum_sum_edges;
}
cum_sum_edges += g_num_edges;
ImmutableGraph graph(g_in_csr_ptr, nullptr);
rst.push_back(graph);
CSRPtr g_in_csr_ptr = CSRPtr(new CSR(indptr_arr, indices_arr, edge_ids_arr));
rst.emplace_back(g_in_csr_ptr, nullptr);
}
return rst;
}
......
This diff is collapsed.
......@@ -85,10 +85,10 @@ DGL_REGISTER_GLOBAL("network._CAPI_SenderSendSubgraph")
CommunicatorHandle chandle = args[0];
int recv_id = args[1];
GraphHandle ghandle = args[2];
const IdArray node_mapping = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[3]));
const IdArray edge_mapping = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[4]));
const IdArray layer_offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[5]));
const IdArray flow_offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[6]));
const IdArray node_mapping = args[3];
const IdArray edge_mapping = args[4];
const IdArray layer_offsets = args[5];
const IdArray flow_offsets = args[6];
ImmutableGraph *ptr = static_cast<ImmutableGraph*>(ghandle);
network::Sender* sender = static_cast<network::Sender*>(chandle);
auto csr = ptr->GetInCSR();
......@@ -160,7 +160,7 @@ DGL_REGISTER_GLOBAL("network._CAPI_ReceiverRecvSubgraph")
int control = *buffer;
if (control == CONTROL_NODEFLOW) {
NodeFlow* nf = new NodeFlow();
ImmutableGraph::CSR::Ptr csr;
CSRPtr csr;
// Deserialize nodeflow from recv_data_buffer
network::DeserializeSampledSubgraph(buffer+sizeof(CONTROL_NODEFLOW),
&(csr),
......@@ -168,7 +168,7 @@ DGL_REGISTER_GLOBAL("network._CAPI_ReceiverRecvSubgraph")
&(nf->edge_mapping),
&(nf->layer_offsets),
&(nf->flow_offsets));
nf->graph = GraphPtr(new ImmutableGraph(csr, nullptr, false));
nf->graph = GraphPtr(new ImmutableGraph(csr, nullptr));
std::vector<NodeFlow*> subgs(1);
subgs[0] = nf;
*rv = WrapVectorReturn(subgs);
......
......@@ -18,7 +18,7 @@ namespace network {
const int kNumTensor = 7; // We need to serialize 7 conponents (tensor) here
int64_t SerializeSampledSubgraph(char* data,
const ImmutableGraph::CSR::Ptr csr,
const CSRPtr csr,
const IdArray& node_mapping,
const IdArray& edge_mapping,
const IdArray& layer_offsets,
......@@ -30,9 +30,9 @@ int64_t SerializeSampledSubgraph(char* data,
int64_t edge_mapping_size = edge_mapping->shape[0] * sizeof(dgl_id_t);
int64_t layer_offsets_size = layer_offsets->shape[0] * sizeof(dgl_id_t);
int64_t flow_offsets_size = flow_offsets->shape[0] * sizeof(dgl_id_t);
int64_t indptr_size = csr->indptr.size() * sizeof(int64_t);
int64_t indices_size = csr->indices.size() * sizeof(dgl_id_t);
int64_t edge_ids_size = csr->edge_ids.size() * sizeof(dgl_id_t);
int64_t indptr_size = csr->indptr().GetSize();
int64_t indices_size = csr->indices().GetSize();
int64_t edge_ids_size = csr->edge_ids().GetSize();
total_size += node_mapping_size;
total_size += edge_mapping_size;
total_size += layer_offsets_size;
......@@ -52,9 +52,9 @@ int64_t SerializeSampledSubgraph(char* data,
dgl_id_t* edge_map_data = static_cast<dgl_id_t*>(edge_mapping->data);
dgl_id_t* layer_off_data = static_cast<dgl_id_t*>(layer_offsets->data);
dgl_id_t* flow_off_data = static_cast<dgl_id_t*>(flow_offsets->data);
int64_t* indptr = static_cast<int64_t*>(csr->indptr.data());
dgl_id_t* indices = static_cast<dgl_id_t*>(csr->indices.data());
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr->edge_ids.data());
dgl_id_t* indptr = static_cast<dgl_id_t*>(csr->indptr()->data);
dgl_id_t* indices = static_cast<dgl_id_t*>(csr->indices()->data);
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr->edge_ids()->data);
// node_mapping
*(reinterpret_cast<int64_t*>(data_ptr)) = node_mapping_size;
data_ptr += sizeof(int64_t);
......@@ -94,7 +94,7 @@ int64_t SerializeSampledSubgraph(char* data,
}
void DeserializeSampledSubgraph(char* data,
ImmutableGraph::CSR::Ptr* csr,
CSRPtr* csr,
IdArray* node_mapping,
IdArray* edge_mapping,
IdArray* layer_offsets,
......@@ -139,25 +139,24 @@ void DeserializeSampledSubgraph(char* data,
memcpy(edge_mapping_data, data_ptr, tensor_size);
data_ptr += tensor_size;
// Construct sub_csr_graph
*csr = std::make_shared<ImmutableGraph::CSR>(num_vertices, num_edges);
(*csr)->indices.resize(num_edges);
(*csr)->edge_ids.resize(num_edges);
// TODO(minjie): multigraph flag
*csr = CSRPtr(new CSR(num_vertices, num_edges, false));
// indices (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
dgl_id_t* col_list_out = (*csr)->indices.data();
dgl_id_t* col_list_out = static_cast<dgl_id_t*>((*csr)->indices()->data);
memcpy(col_list_out, data_ptr, tensor_size);
data_ptr += tensor_size;
// edge_ids (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
dgl_id_t* edge_ids = (*csr)->edge_ids.data();
dgl_id_t* edge_ids = static_cast<dgl_id_t*>((*csr)->edge_ids()->data);
memcpy(edge_ids, data_ptr, tensor_size);
data_ptr += tensor_size;
// indptr (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
int64_t* indptr_out = (*csr)->indptr.data();
dgl_id_t* indptr_out = static_cast<dgl_id_t*>((*csr)->indptr()->data);
memcpy(indptr_out, data_ptr, tensor_size);
data_ptr += tensor_size;
}
......
......@@ -23,7 +23,7 @@ namespace network {
* \return the total size of the serialized binary data
*/
int64_t SerializeSampledSubgraph(char* data,
const ImmutableGraph::CSR::Ptr csr,
const CSRPtr csr,
const IdArray& node_mapping,
const IdArray& edge_mapping,
const IdArray& layer_offsets,
......@@ -39,7 +39,7 @@ int64_t SerializeSampledSubgraph(char* data,
* \param flow_offsets flow offsets in NodeFlowIndex
*/
void DeserializeSampledSubgraph(char* data,
ImmutableGraph::CSR::Ptr* csr,
CSRPtr* csr,
IdArray* node_mapping,
IdArray* edge_mapping,
IdArray* layer_offsets,
......
......@@ -11,38 +11,42 @@
#include "../c_api_common.h"
using dgl::runtime::DGLArgs;
using dgl::runtime::DGLArgValue;
using dgl::runtime::DGLRetValue;
using dgl::runtime::PackedFunc;
namespace dgl {
std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::string &fmt,
size_t layer0_size, size_t layer1_start,
size_t layer1_end, bool remap) {
CHECK_GE(layer1_start, layer0_size);
if (fmt == "csr") {
if (fmt == std::string("csr")) {
dgl_id_t first_vid = layer1_start - layer0_size;
ImmutableGraph::CSRArray arrs = graph.GetInCSRArray(layer1_start, layer1_end);
CSRMatrix csr = SliceRows(graph.GetInCSR()->ToCSRMatrix(), layer1_start, layer1_end);
if (remap) {
dgl_id_t *indices_data = static_cast<dgl_id_t*>(arrs.indices->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(arrs.id->data);
const size_t len = arrs.indices->shape[0];
dgl_id_t first_eid = eid_data[0];
for (size_t i = 0; i < len; i++) {
CHECK_GE(indices_data[i], first_vid);
indices_data[i] -= first_vid;
CHECK_GE(eid_data[i], first_eid);
eid_data[i] -= first_eid;
}
dgl_id_t *eid_data = static_cast<dgl_id_t*>(csr.data->data);
const dgl_id_t first_eid = eid_data[0];
IdArray new_indices = Sub(csr.indices, first_vid);
IdArray new_data = Sub(csr.data, first_eid);
return {csr.indptr, new_indices, new_data};
} else {
return {csr.indptr, csr.indices, csr.data};
}
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
ImmutableGraph::CSR::Ptr csr = graph.GetInCSR();
int64_t nnz = csr->indptr[layer1_end] - csr->indptr[layer1_start];
IdArray idx = IdArray::Empty({2 * nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
} else if (fmt == std::string("coo")) {
CSRMatrix csr = graph.GetInCSR()->ToCSRMatrix();
const dgl_id_t* indptr = static_cast<dgl_id_t*>(csr.indptr->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(csr.indices->data);
const dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr.data->data);
int64_t nnz = indptr[layer1_end] - indptr[layer1_start];
IdArray idx = NewIdArray(2 * nnz);
IdArray eid = NewIdArray(nnz);
int64_t *idx_data = static_cast<int64_t*>(idx->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(eid->data);
size_t num_edges = 0;
for (size_t i = layer1_start; i < layer1_end; i++) {
for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++) {
for (dgl_id_t j = indptr[i]; j < indptr[i + 1]; j++) {
// These nodes are all in a layer. We need to remap them to the node id
// local to the layer.
idx_data[num_edges] = remap ? i - layer1_start : i;
......@@ -51,25 +55,38 @@ std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::st
}
CHECK_EQ(num_edges, nnz);
if (remap) {
size_t edge_start = csr->indptr[layer1_start];
dgl_id_t first_eid = csr->edge_ids[edge_start];
size_t edge_start = indptr[layer1_start];
dgl_id_t first_eid = edge_ids[edge_start];
dgl_id_t first_vid = layer1_start - layer0_size;
for (int64_t i = 0; i < nnz; i++) {
CHECK_GE(csr->indices[edge_start + i], first_vid);
idx_data[nnz + i] = csr->indices[edge_start + i] - first_vid;
eid_data[i] = csr->edge_ids[edge_start + i] - first_eid;
CHECK_GE(indices[edge_start + i], first_vid);
idx_data[nnz + i] = indices[edge_start + i] - first_vid;
eid_data[i] = edge_ids[edge_start + i] - first_eid;
}
} else {
std::copy(csr->indices.begin() + csr->indptr[layer1_start],
csr->indices.begin() + csr->indptr[layer1_end], idx_data + nnz);
std::copy(csr->edge_ids.begin() + csr->indptr[layer1_start],
csr->edge_ids.begin() + csr->indptr[layer1_end], eid_data);
std::copy(indices + indptr[layer1_start],
indices + indptr[layer1_end], idx_data + nnz);
std::copy(edge_ids + indptr[layer1_start],
edge_ids + indptr[layer1_end], eid_data);
}
return std::vector<IdArray>{idx, eid};
} else {
LOG(FATAL) << "unsupported adjacency matrix format";
return std::vector<IdArray>();
return {};
}
}
DGL_REGISTER_GLOBAL("nodeflow._CAPI_NodeFlowGetBlockAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::string format = args[1];
int64_t layer0_size = args[2];
int64_t start = args[3];
int64_t end = args[4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph* gptr = dynamic_cast<const ImmutableGraph*>(ptr);
auto res = GetNodeFlowSlice(*gptr, format, layer0_size, start, end, true);
*rv = ConvertNDArrayVectorToPackedFunc(res);
});
} // namespace dgl
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment