Unverified Commit 605b5185 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Refactor] Immutable graph index (#543)

* WIP

* header

* WIP .cc

* WIP

* transpose

* wip

* immutable graph .h and .cc

* WIP: nodeflow.cc

* compile

* remove all tmp dl managed ctx; they caused refcount issue

* one simple test

* WIP: testing

* test_graph

* fix graph index

* fix bug in sampler; pass pytorch utest

* WIP on mxnet

* fix lint

* fix mxnet unittest w/ unfortunate workaround

* fix msvc

* fix lint

* SliceRows and test_nodeflow

* resolve reviews

* resolve reviews

* try fix win ci

* try fix win ci

* poke win ci again

* poke

* lazy multigraph flag; stackoverflow error

* revert node subgraph test

* lazy object

* try fix win build

* try fix win build

* poke ci

* fix build script

* fix compile

* add a todo

* fix reviews

* fix compile
parent b2b8be25
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/array.h
* \brief Array types and common array operations required by DGL.
*
* Note that this is not meant for a full support of array library such as ATen.
* Only a limited set of operators required by DGL are implemented.
*/
#ifndef DGL_ARRAY_H_
#define DGL_ARRAY_H_
#include <dgl/runtime/ndarray.h>
#include <vector>
namespace dgl {
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
typedef dgl::runtime::NDArray FloatArray;
/*! \brief Create a new id array with given length (on CPU) */
IdArray NewIdArray(int64_t length);
/*! \brief Create a new id array with the given vector data (on CPU) */
IdArray VecToIdArray(const std::vector<dgl_id_t>& vec);
/*! \brief Create a copy of the given array */
IdArray Clone(IdArray arr);
/*! \brief Arithmetic functions */
IdArray Add(IdArray lhs, IdArray rhs);
IdArray Sub(IdArray lhs, IdArray rhs);
IdArray Mul(IdArray lhs, IdArray rhs);
IdArray Div(IdArray lhs, IdArray rhs);
IdArray Add(IdArray lhs, dgl_id_t rhs);
IdArray Sub(IdArray lhs, dgl_id_t rhs);
IdArray Mul(IdArray lhs, dgl_id_t rhs);
IdArray Div(IdArray lhs, dgl_id_t rhs);
IdArray Add(dgl_id_t lhs, IdArray rhs);
IdArray Sub(dgl_id_t lhs, IdArray rhs);
IdArray Mul(dgl_id_t lhs, IdArray rhs);
IdArray Div(dgl_id_t lhs, IdArray rhs);
/*! \brief Stack two arrays (of len L) into a 2*L length array */
IdArray HStack(IdArray arr1, IdArray arr2);
/*! \brief Plain CSR matrix */
struct CSRMatrix {
IdArray indptr, indices, data;
};
/*! \brief Plain COO structure */
struct COOMatrix {
IdArray row, col, data;
};
/*! \brief Slice rows of the given matrix and return. */
CSRMatrix SliceRows(const CSRMatrix& csr, int64_t start, int64_t end);
/*! \brief Convert COO matrix to CSR matrix. */
CSRMatrix ToCSR(const COOMatrix);
/*! \brief Convert COO matrix to CSR matrix. */
COOMatrix ToCOO(const CSRMatrix);
} // namespace dgl
#endif // DGL_ARRAY_H_
......@@ -40,8 +40,7 @@ class Graph: public GraphInterface {
explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief construct a graph from the coo format. */
Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes, bool multigraph = false);
/*! \brief default copy constructor */
Graph(const Graph& other) = default;
......
......@@ -9,16 +9,11 @@
#include <string>
#include <vector>
#include <utility>
#include "runtime/ndarray.h"
#include <algorithm>
namespace dgl {
#include "array.h"
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
typedef dgl::runtime::NDArray FloatArray;
namespace dgl {
struct Subgraph;
struct NodeFlow;
......@@ -32,8 +27,10 @@ const dgl_id_t DGL_INVALID_ID = static_cast<dgl_id_t>(-1);
* but it doesn't own data itself. instead, it only references data in std::vector.
*/
class DGLIdIters {
const dgl_id_t *begin_, *end_;
public:
/* !\brief default constructor to create an empty range */
DGLIdIters() {}
/* !\brief constructor with given begin and end */
DGLIdIters(const dgl_id_t *begin, const dgl_id_t *end) {
this->begin_ = begin;
this->end_ = end;
......@@ -50,6 +47,8 @@ class DGLIdIters {
size_t size() const {
return this->end_ - this->begin_;
}
private:
const dgl_id_t *begin_{nullptr}, *end_{nullptr};
};
class GraphInterface;
......@@ -118,13 +117,49 @@ class GraphInterface {
virtual bool HasVertex(dgl_id_t vid) const = 0;
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
virtual BoolArray HasVertices(IdArray vids) const = 0;
virtual BoolArray HasVertices(IdArray vids) const {
const auto len = vids->shape[0];
BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const uint64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
}
return rst;
}
/*! \return true if the given edge is in the graph.*/
virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0;
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const {
const auto srclen = src_ids->shape[0];
const auto dstlen = dst_ids->shape[0];
const auto rstlen = std::max(srclen, dstlen);
BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
if (srclen == 1) {
// one-many
for (int64_t i = 0; i < dstlen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0;
}
} else if (dstlen == 1) {
// many-one
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0;
}
} else {
// many-many
CHECK(srclen == dstlen) << "Invalid src and dst id array.";
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0;
}
}
return rst;
}
/*!
* \brief Find the predecessors of a vertex.
......@@ -329,6 +364,13 @@ class GraphInterface {
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
*
* If the fmt is 'csr', the function should return three arrays, representing
* indptr, indices and edge ids
*
* If the fmt is 'coo', the function should return one array of shape (2, nnz),
* representing a horitonzal stack of row and col indices.
*
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of IdArrays.
......
......@@ -14,320 +14,452 @@
#include <algorithm>
#include "runtime/ndarray.h"
#include "graph_interface.h"
#include "lazy.h"
namespace dgl {
class CSR;
class COO;
typedef std::shared_ptr<CSR> CSRPtr;
typedef std::shared_ptr<COO> COOPtr;
/*!
* \brief DGL immutable graph index class.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
* \brief Graph class stored using CSR structure.
*/
class ImmutableGraph: public GraphInterface {
class CSR : public GraphInterface {
public:
typedef struct {
IdArray indptr, indices, id;
} CSRArray;
struct Edge {
dgl_id_t end_points[2];
dgl_id_t edge_id;
};
struct EdgeList;
struct CSR {
typedef std::shared_ptr<CSR> Ptr;
/*
* This vector provides interfaces similar to std::vector.
* The main difference is that the memory used by the vector can be allocated
* outside the vector. The main use case is that the vector can use the shared
* memory that is created by another process. In this way, we can access the
* graph structure loaded in another process.
*/
template<class T>
class vector {
public:
vector() {
this->arr = nullptr;
this->capacity = 0;
this->curr = 0;
this->own = false;
}
// Create a csr graph that has the given number of verts and edges.
CSR(int64_t num_vertices, int64_t num_edges, bool is_multigraph);
// Create a csr graph whose memory is stored in the shared memory
// that has the given number of verts and edges.
CSR(const std::string &shared_mem_name,
int64_t num_vertices, int64_t num_edges, bool is_multigraph);
// Create a csr graph that shares the given indptr and indices.
CSR(IdArray indptr, IdArray indices, IdArray edge_ids);
CSR(IdArray indptr, IdArray indices, IdArray edge_ids, bool is_multigraph);
// Create a csr graph whose memory is stored in the shared memory
// and the structure is given by the indptr and indcies.
CSR(IdArray indptr, IdArray indices, IdArray edge_ids,
const std::string &shared_mem_name);
CSR(IdArray indptr, IdArray indices, IdArray edge_ids, bool is_multigraph,
const std::string &shared_mem_name);
void AddVertices(uint64_t num_vertices) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
/*
* Create a vector whose memory is allocated outside.
* Here there are no elements in the vector.
*/
vector(T *arr, size_t size) {
this->arr = arr;
this->capacity = size;
this->curr = 0;
this->own = false;
}
void AddEdge(dgl_id_t src, dgl_id_t dst) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
/*
* Create a vector whose memory is allocated by the vector.
* Here there are no elements in the vector.
*/
explicit vector(size_t size) {
this->arr = static_cast<T *>(malloc(size * sizeof(T)));
this->capacity = size;
this->curr = 0;
this->own = true;
}
void AddEdges(IdArray src_ids, IdArray dst_ids) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
~vector() {
// If the memory is allocated by the vector, it should be free'd.
if (this->own) {
free(this->arr);
}
}
void Clear() override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
vector(const vector &other) = delete;
/*
* Initialize the vector whose memory is allocated outside.
* There are no elements in the vector.
*/
void init(T *arr, size_t size) {
CHECK(this->arr == nullptr);
this->arr = arr;
this->capacity = size;
this->curr = 0;
this->own = false;
}
bool IsMultigraph() const override;
/*
* Initialize the vector whose memory is allocated outside.
* There are elements in the vector.
*/
void init(T *arr, size_t capacity, size_t size) {
CHECK(this->arr == nullptr);
CHECK_LE(size, capacity);
this->arr = arr;
this->capacity = capacity;
this->curr = size;
this->own = false;
}
bool IsReadonly() const override {
return true;
}
/* Similar to std::vector::push_back. */
void push_back(T val) {
// If the vector doesn't own the memory, it can't adjust its memory size.
if (!this->own) {
CHECK_LT(curr, capacity);
} else if (curr == capacity) {
this->capacity = this->capacity * 2;
this->arr = static_cast<T *>(realloc(this->arr, this->capacity * sizeof(T)));
CHECK(this->arr) << "can't allocate memory for a larger vector.";
}
this->arr[curr++] = val;
}
uint64_t NumVertices() const override {
return indptr_->shape[0] - 1;
}
/*
* This inserts multiple elements to the back of the vector.
*/
void insert_back(const T* val, size_t len) {
if (!this->own) {
CHECK_LE(curr + len, capacity);
} else if (curr + len > capacity) {
this->capacity = curr + len;
this->arr = static_cast<T *>(realloc(this->arr, this->capacity * sizeof(T)));
CHECK(this->arr) << "can't allocate memory for a larger vector.";
}
std::copy(val, val + len, this->arr + curr);
curr += len;
}
uint64_t NumEdges() const override {
return indices_->shape[0];
}
/*
* Similar to std::vector::[].
* It checks the boundary of the vector.
*/
T &operator[](size_t idx) {
CHECK_LT(idx, curr);
return this->arr[idx];
}
bool HasVertex(dgl_id_t vid) const override {
return vid < NumVertices() && vid >= 0;
}
/*
* Similar to std::vector::[].
* It checks the boundary of the vector.
*/
const T &operator[](size_t idx) const {
CHECK_LT(idx, curr);
return this->arr[idx];
}
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override;
/* Similar to std::vector::size. */
size_t size() const {
return this->curr;
}
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override {
LOG(FATAL) << "CSR graph does not support efficient predecessor query."
<< " Please use successors on the reverse CSR graph.";
return {};
}
/* Similar to std::vector::resize. */
void resize(size_t new_size) {
if (!this->own) {
CHECK_LE(new_size, capacity);
} else if (new_size > capacity) {
this->capacity = new_size;
this->arr = static_cast<T *>(realloc(this->arr, this->capacity * sizeof(T)));
CHECK(this->arr) << "can't allocate memory for a larger vector.";
}
for (size_t i = this->curr; i < new_size; i++)
this->arr[i] = 0;
this->curr = new_size;
}
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override;
/* Similar to std::vector::clear. */
void clear() {
this->curr = 0;
}
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override;
/* Similar to std::vector::data. */
const T *data() const {
return this->arr;
}
EdgeArray EdgeIds(IdArray src, IdArray dst) const override;
/* Similar to std::vector::data. */
T *data() {
return this->arr;
}
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const override {
LOG(FATAL) << "CSR graph does not support efficient FindEdge."
<< " Please use COO graph.";
return {};
}
/*
* This is to simulate begin() of std::vector.
* However, it returns the raw pointer instead of iterator.
*/
const T *begin() const {
return this->arr;
}
EdgeArray FindEdges(IdArray eids) const override {
LOG(FATAL) << "CSR graph does not support efficient FindEdges."
<< " Please use COO graph.";
return {};
}
/*
* This is to simulate begin() of std::vector.
* However, it returns the raw pointer instead of iterator.
*/
T *begin() {
return this->arr;
}
EdgeArray InEdges(dgl_id_t vid) const override {
LOG(FATAL) << "CSR graph does not support efficient inedges query."
<< " Please use outedges on the reverse CSR graph.";
return {};
}
/*
* This is to simulate end() of std::vector.
* However, it returns the raw pointer instead of iterator.
*/
const T *end() const {
return this->arr + this->curr;
}
EdgeArray InEdges(IdArray vids) const override {
LOG(FATAL) << "CSR graph does not support efficient inedges query."
<< " Please use outedges on the reverse CSR graph.";
return {};
}
/*
* This is to simulate end() of std::vector.
* However, it returns the raw pointer instead of iterator.
*/
T *end() {
return this->arr + this->curr;
}
EdgeArray OutEdges(dgl_id_t vid) const override;
private:
/*
* \brief the raw array that contains elements of type T.
*
* The vector may or may not own the memory of the raw array.
*/
T *arr;
/* \brief the memory size of the raw array. */
size_t capacity;
/* \brief the number of elements in the array. */
size_t curr;
/* \brief whether the vector owns the memory. */
bool own;
};
vector<int64_t> indptr;
vector<dgl_id_t> indices;
vector<dgl_id_t> edge_ids;
CSR(int64_t num_vertices, int64_t expected_num_edges);
CSR(IdArray indptr, IdArray indices, IdArray edge_ids);
CSR(IdArray indptr, IdArray indices, IdArray edge_ids,
const std::string &shared_mem_name);
CSR(const std::string &shared_mem_name, size_t num_vertices, size_t num_edges);
bool HasVertex(dgl_id_t vid) const {
return vid < NumVertices();
}
EdgeArray OutEdges(IdArray vids) const override;
uint64_t NumVertices() const {
return indptr.size() - 1;
}
EdgeArray Edges(const std::string &order = "") const override;
uint64_t NumEdges() const {
return indices.size();
}
uint64_t InDegree(dgl_id_t vid) const override {
LOG(FATAL) << "CSR graph does not support efficient indegree query."
<< " Please use outdegree on the reverse CSR graph.";
return 0;
}
/* This gets the sum of vertex degrees in the range. */
uint64_t GetDegree(dgl_id_t start, dgl_id_t end) const {
return indptr[end] - indptr[start];
}
DegreeArray InDegrees(IdArray vids) const override {
LOG(FATAL) << "CSR graph does not support efficient indegree query."
<< " Please use outdegree on the reverse CSR graph.";
return {};
}
uint64_t GetDegree(dgl_id_t vid) const {
return indptr[vid + 1] - indptr[vid];
}
DegreeArray GetDegrees(IdArray vids) const;
EdgeArray GetEdges(dgl_id_t vid) const;
EdgeArray GetEdges(IdArray vids) const;
/* \brief this returns the start and end position of the column indices corresponding v. */
DGLIdIters GetIndexRef(dgl_id_t v) const {
const int64_t start = indptr[v];
const int64_t end = indptr[v + 1];
return DGLIdIters(indices.begin() + start, indices.begin() + end);
}
/*
* Read all edges and store them in the vector.
*/
void ReadAllEdges(std::vector<Edge> *edges) const;
CSR::Ptr Transpose() const;
std::pair<CSR::Ptr, IdArray> VertexSubgraph(IdArray vids) const;
std::pair<CSR::Ptr, IdArray> EdgeSubgraph(IdArray eids,
std::shared_ptr<EdgeList> edge_list) const;
/*
* Construct a CSR from a list of edges.
*
* When constructing a CSR, we need to sort the edge list. To reduce the overhead,
* we simply sort on the input edge list. We allow sorting on both end points of an edge,
* which is specified by `sort_on`.
*/
static CSR::Ptr FromEdges(std::vector<Edge> *edges, int sort_on, uint64_t num_nodes);
private:
#ifndef _WIN32
std::shared_ptr<runtime::SharedMemory> mem;
#endif // _WIN32
};
// Edge list indexed by edge id;
struct EdgeList {
typedef std::shared_ptr<EdgeList> Ptr;
std::vector<dgl_id_t> src_points;
std::vector<dgl_id_t> dst_points;
EdgeList(int64_t len, dgl_id_t val) {
src_points.resize(len, val);
dst_points.resize(len, val);
}
uint64_t OutDegree(dgl_id_t vid) const override {
const int64_t* indptr_data = static_cast<int64_t*>(indptr_->data);
return indptr_data[vid + 1] - indptr_data[vid];
}
DegreeArray OutDegrees(IdArray vids) const override;
Subgraph VertexSubgraph(IdArray vids) const override;
Subgraph EdgeSubgraph(IdArray eids) const override {
LOG(FATAL) << "CSR graph does not support efficient EdgeSubgraph."
<< " Please use COO graph instead.";
return {};
}
GraphPtr Reverse() const override {
return Transpose();
}
DGLIdIters SuccVec(dgl_id_t vid) const override {
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t start = indptr_data[vid];
const dgl_id_t end = indptr_data[vid + 1];
return DGLIdIters(indices_data + start, indices_data + end);
}
DGLIdIters OutEdgeVec(dgl_id_t vid) const override {
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(edge_ids_->data);
const dgl_id_t start = indptr_data[vid];
const dgl_id_t end = indptr_data[vid + 1];
return DGLIdIters(eid_data + start, eid_data + end);
}
DGLIdIters PredVec(dgl_id_t vid) const override {
LOG(FATAL) << "CSR graph does not support efficient PredVec."
<< " Please use SuccVec on the reverse CSR graph.";
return DGLIdIters(nullptr, nullptr);
}
DGLIdIters InEdgeVec(dgl_id_t vid) const override {
LOG(FATAL) << "CSR graph does not support efficient InEdgeVec."
<< " Please use OutEdgeVec on the reverse CSR graph.";
return DGLIdIters(nullptr, nullptr);
}
GraphInterface *Reset() override {
CSR* gptr = new CSR();
*gptr = std::move(*this);
return gptr;
}
std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const override {
CHECK(!transpose && fmt == "csr") << "Not valid adj format request.";
return {indptr_, indices_, edge_ids_};
}
/*! \brief Return the reverse of this CSR graph (i.e, a CSC graph) */
CSRPtr Transpose() const;
/*! \brief Convert this CSR to COO */
COOPtr ToCOO() const;
/*!
* \return the csr matrix that represents this graph.
* \note The csr matrix shares the storage with this graph.
* The data field of the CSR matrix stores the edge ids.
*/
CSRMatrix ToCSRMatrix() const {
return CSRMatrix{indptr_, indices_, edge_ids_};
}
// member getters
IdArray indptr() const { return indptr_; }
IdArray indices() const { return indices_; }
IdArray edge_ids() const { return edge_ids_; }
private:
/*! \brief prive default constructor */
CSR() {}
// The CSR arrays.
// - The index is 0-based.
// - The out edges of vertex v is stored from `indices_[indptr_[v]]` to
// `indices_[indptr_[v+1]]` (exclusive).
// - The indices are *not* necessarily sorted.
// TODO(minjie): in the future, we should separate CSR and graph. A general CSR
// is not necessarily a graph, but graph operations could be implemented by
// CSR matrix operations. CSR matrix operations would be backed by different
// devices (CPU, CUDA, ...), while graph interface will not be aware of that.
IdArray indptr_, indices_, edge_ids_;
// whether the graph is a multi-graph
LazyObject<bool> is_multigraph_;
};
class COO : public GraphInterface {
public:
// Create a coo graph that shares the given src and dst
COO(int64_t num_vertices, IdArray src, IdArray dst);
COO(int64_t num_vertices, IdArray src, IdArray dst, bool is_multigraph);
// TODO(da): add constructor for creating COO from shared memory
void AddVertices(uint64_t num_vertices) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
void AddEdge(dgl_id_t src, dgl_id_t dst) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
void AddEdges(IdArray src_ids, IdArray dst_ids) override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
void Clear() override {
LOG(FATAL) << "CSR graph does not allow mutation.";
}
bool IsMultigraph() const override;
bool IsReadonly() const override {
return true;
}
uint64_t NumVertices() const override {
return num_vertices_;
}
uint64_t NumEdges() const override {
return src_->shape[0];
}
bool HasVertex(dgl_id_t vid) const override {
return vid < NumVertices() && vid >= 0;
}
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override {
LOG(FATAL) << "COO graph does not support efficient HasEdgeBetween."
<< " Please use CSR graph or AdjList graph instead.";
return false;
}
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override {
LOG(FATAL) << "COO graph does not support efficient Predecessors."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override {
LOG(FATAL) << "COO graph does not support efficient Successors."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override {
LOG(FATAL) << "COO graph does not support efficient EdgeId."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
EdgeArray EdgeIds(IdArray src, IdArray dst) const override {
LOG(FATAL) << "COO graph does not support efficient EdgeId."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const override {
CHECK(eid < NumEdges() && eid >= 0) << "Invalid edge id: " << eid;
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_->data);
return std::make_pair(src_data[eid], dst_data[eid]);
}
EdgeArray FindEdges(IdArray eids) const override;
EdgeArray InEdges(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient InEdges."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
void register_edge(dgl_id_t eid, dgl_id_t src, dgl_id_t dst) {
CHECK_LT(eid, src_points.size()) << "Invalid edge id " << eid;
src_points[eid] = src;
dst_points[eid] = dst;
EdgeArray InEdges(IdArray vids) const override {
LOG(FATAL) << "COO graph does not support efficient InEdges."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
EdgeArray OutEdges(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient OutEdges."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
EdgeArray OutEdges(IdArray vids) const override {
LOG(FATAL) << "COO graph does not support efficient OutEdges."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
EdgeArray Edges(const std::string &order = "") const override;
uint64_t InDegree(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient InDegree."
<< " Please use CSR graph or AdjList graph instead.";
return 0;
}
DegreeArray InDegrees(IdArray vids) const override {
LOG(FATAL) << "COO graph does not support efficient InDegrees."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
uint64_t OutDegree(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient OutDegree."
<< " Please use CSR graph or AdjList graph instead.";
return 0;
}
DegreeArray OutDegrees(IdArray vids) const override {
LOG(FATAL) << "COO graph does not support efficient OutDegrees."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
Subgraph VertexSubgraph(IdArray vids) const override {
LOG(FATAL) << "COO graph does not support efficient VertexSubgraph."
<< " Please use CSR graph or AdjList graph instead.";
return {};
}
Subgraph EdgeSubgraph(IdArray eids) const override;
GraphPtr Reverse() const override {
return Transpose();
}
DGLIdIters SuccVec(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient SuccVec."
<< " Please use CSR graph or AdjList graph instead.";
return DGLIdIters(nullptr, nullptr);
}
DGLIdIters OutEdgeVec(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient OutEdgeVec."
<< " Please use CSR graph or AdjList graph instead.";
return DGLIdIters(nullptr, nullptr);
}
DGLIdIters PredVec(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient PredVec."
<< " Please use CSR graph or AdjList graph instead.";
return DGLIdIters(nullptr, nullptr);
}
DGLIdIters InEdgeVec(dgl_id_t vid) const override {
LOG(FATAL) << "COO graph does not support efficient InEdgeVec."
<< " Please use CSR graph or AdjList graph instead.";
return DGLIdIters(nullptr, nullptr);
}
GraphInterface *Reset() override {
COO* gptr = new COO();
*gptr = std::move(*this);
return gptr;
}
std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const override {
CHECK(fmt == "coo") << "Not valid adj format request.";
if (transpose) {
return {HStack(dst_, src_)};
} else {
return {HStack(src_, dst_)};
}
}
static EdgeList::Ptr FromCSR(
const CSR::vector<int64_t>& indptr,
const CSR::vector<dgl_id_t>& indices,
const CSR::vector<dgl_id_t>& edge_ids,
bool in_csr);
};
/*! \brief Return the transpose of this COO */
COOPtr Transpose() const {
return COOPtr(new COO(num_vertices_, dst_, src_));
}
/*! \brief Construct an immutable graph from the COO format. */
ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
/*! \brief Convert this COO to CSR */
CSRPtr ToCSR() const;
/*!
* \brief Get the coo matrix that represents this graph.
* \note The coo matrix shares the storage with this graph.
* The data field of the coo matrix is none.
*/
COOMatrix ToCOOMatrix() const {
return COOMatrix{src_, dst_, {}};
}
// member getters
IdArray src() const { return src_; }
IdArray dst() const { return dst_; }
private:
/* !\brief private default constructor */
COO() {}
/*! \brief number of vertices */
int64_t num_vertices_;
/*! \brief coordinate arrays */
IdArray src_, dst_;
/*! \brief whether the graph is a multi-graph */
LazyObject<bool> is_multigraph_;
};
/*!
* \brief DGL immutable graph index class.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
*/
class ImmutableGraph: public GraphInterface {
public:
/*! \brief Construct an immutable graph from the COO format. */
explicit ImmutableGraph(COOPtr coo): coo_(coo) { }
/*!
* \brief Construct an immutable graph from the CSR format.
*
......@@ -341,16 +473,13 @@ class ImmutableGraph: public GraphInterface {
* construct one of the CSRs that runs fast for some operations we expect and construct
* the other CSR on demand.
*/
ImmutableGraph(CSR::Ptr in_csr, CSR::Ptr out_csr,
bool multigraph = false) : is_multigraph_(multigraph) {
this->in_csr_ = in_csr;
this->out_csr_ = out_csr;
CHECK(this->in_csr_ != nullptr || this->out_csr_ != nullptr)
<< "there must exist one of the CSRs";
ImmutableGraph(CSRPtr in_csr, CSRPtr out_csr)
: in_csr_(in_csr), out_csr_(out_csr) {
CHECK(in_csr_ || out_csr_) << "Both CSR are missing.";
}
/*! \brief default constructor */
explicit ImmutableGraph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief Construct an immutable graph from one CSR. */
explicit ImmutableGraph(CSRPtr csr): out_csr_(csr) { }
/*! \brief default copy constructor */
ImmutableGraph(const ImmutableGraph& other) = default;
......@@ -362,9 +491,10 @@ class ImmutableGraph: public GraphInterface {
ImmutableGraph(ImmutableGraph&& other) {
this->in_csr_ = other.in_csr_;
this->out_csr_ = other.out_csr_;
this->is_multigraph_ = other.is_multigraph_;
this->coo_ = other.coo_;
other.in_csr_ = nullptr;
other.out_csr_ = nullptr;
other.coo_ = nullptr;
}
#endif // _MSC_VER
......@@ -374,38 +504,19 @@ class ImmutableGraph: public GraphInterface {
/*! \brief default destructor */
~ImmutableGraph() = default;
/*!
* \brief Add vertices to the graph.
* \note Since vertices are integers enumerated from zero, only the number of
* vertices to be added needs to be specified.
* \param num_vertices The number of vertices to be added.
*/
void AddVertices(uint64_t num_vertices) {
void AddVertices(uint64_t num_vertices) override {
LOG(FATAL) << "AddVertices isn't supported in ImmutableGraph";
}
/*!
* \brief Add one edge to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
*/
void AddEdge(dgl_id_t src, dgl_id_t dst) {
void AddEdge(dgl_id_t src, dgl_id_t dst) override {
LOG(FATAL) << "AddEdge isn't supported in ImmutableGraph";
}
/*!
* \brief Add edges to the graph.
* \param src_ids The source vertex id array.
* \param dst_ids The destination vertex id array.
*/
void AddEdges(IdArray src_ids, IdArray dst_ids) {
void AddEdges(IdArray src_ids, IdArray dst_ids) override {
LOG(FATAL) << "AddEdges isn't supported in ImmutableGraph";
}
/*!
* \brief Clear the graph. Remove all vertices/edges.
*/
void Clear() {
void Clear() override {
LOG(FATAL) << "Clear isn't supported in ImmutableGraph";
}
......@@ -413,46 +524,40 @@ class ImmutableGraph: public GraphInterface {
* \note not const since we have caches
* \return whether the graph is a multigraph
*/
bool IsMultigraph() const {
return is_multigraph_;
bool IsMultigraph() const override {
return AnyGraph()->IsMultigraph();
}
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const {
bool IsReadonly() const override {
return true;
}
/*! \return the number of vertices in the graph.*/
uint64_t NumVertices() const {
if (in_csr_)
return in_csr_->NumVertices();
else
return out_csr_->NumVertices();
uint64_t NumVertices() const override {
return AnyGraph()->NumVertices();
}
/*! \return the number of edges in the graph.*/
uint64_t NumEdges() const {
if (in_csr_)
return in_csr_->NumEdges();
else
return out_csr_->NumEdges();
uint64_t NumEdges() const override {
return AnyGraph()->NumEdges();
}
/*! \return true if the given vertex is in the graph.*/
bool HasVertex(dgl_id_t vid) const {
bool HasVertex(dgl_id_t vid) const override {
return vid < NumVertices();
}
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
BoolArray HasVertices(IdArray vids) const;
/*! \return true if the given edge is in the graph.*/
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const;
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override {
if (in_csr_) {
return in_csr_->HasEdgeBetween(dst, src);
} else {
return GetOutCSR()->HasEdgeBetween(src, dst);
}
}
/*!
* \brief Find the predecessors of a vertex.
......@@ -460,7 +565,9 @@ class ImmutableGraph: public GraphInterface {
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the predecessor id array.
*/
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const;
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override {
return GetInCSR()->Successors(vid, radius);
}
/*!
* \brief Find the successors of a vertex.
......@@ -468,7 +575,9 @@ class ImmutableGraph: public GraphInterface {
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the successor id array.
*/
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const;
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override {
return GetOutCSR()->Successors(vid, radius);
}
/*!
* \brief Get all edge ids between the two given endpoints
......@@ -478,7 +587,13 @@ class ImmutableGraph: public GraphInterface {
* \param dst The destination vertex.
* \return the edge id array.
*/
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const;
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override {
if (in_csr_) {
return in_csr_->EdgeId(dst, src);
} else {
return GetOutCSR()->EdgeId(src, dst);
}
}
/*!
* \brief Get all edge ids between the given endpoint pairs.
......@@ -489,21 +604,31 @@ class ImmutableGraph: public GraphInterface {
* first, and ties are broken by the order of edge ID.
* \return EdgeArray containing all edges between all pairs.
*/
EdgeArray EdgeIds(IdArray src, IdArray dst) const;
EdgeArray EdgeIds(IdArray src, IdArray dst) const override {
if (in_csr_) {
return in_csr_->EdgeIds(dst, src);
} else {
return GetOutCSR()->EdgeIds(src, dst);
}
}
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const;
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const override {
return GetCOO()->FindEdge(eid);
}
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
* \return EdgeArray containing all edges with id in eid. The order is preserved.
*/
EdgeArray FindEdges(IdArray eids) const;
EdgeArray FindEdges(IdArray eids) const override {
return GetCOO()->FindEdges(eids);
}
/*!
* \brief Get the in edges of the vertex.
......@@ -511,8 +636,9 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the edges
*/
EdgeArray InEdges(dgl_id_t vid) const {
return this->GetInCSR()->GetEdges(vid);
EdgeArray InEdges(dgl_id_t vid) const override {
const EdgeArray& ret = GetInCSR()->OutEdges(vid);
return {ret.dst, ret.src, ret.id};
}
/*!
......@@ -520,8 +646,9 @@ class ImmutableGraph: public GraphInterface {
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray InEdges(IdArray vids) const {
return this->GetInCSR()->GetEdges(vids);
EdgeArray InEdges(IdArray vids) const override {
const EdgeArray& ret = GetInCSR()->OutEdges(vids);
return {ret.dst, ret.src, ret.id};
}
/*!
......@@ -530,10 +657,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(dgl_id_t vid) const {
auto ret = this->GetOutCSR()->GetEdges(vid);
// We should reverse the source and destination in the edge array.
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
EdgeArray OutEdges(dgl_id_t vid) const override {
return GetOutCSR()->OutEdges(vid);
}
/*!
......@@ -541,9 +666,8 @@ class ImmutableGraph: public GraphInterface {
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(IdArray vids) const {
auto ret = this->GetOutCSR()->GetEdges(vids);
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
EdgeArray OutEdges(IdArray vids) const override {
return GetOutCSR()->OutEdges(vids);
}
/*!
......@@ -553,16 +677,15 @@ class ImmutableGraph: public GraphInterface {
* \param sorted Whether the returned edge list is sorted by their src and dst ids
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray Edges(const std::string &order = "") const;
EdgeArray Edges(const std::string &order = "") const override;
/*!
* \brief Get the in degree of the given vertex.
* \param vid The vertex id.
* \return the in degree
*/
uint64_t InDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetInCSR()->GetDegree(vid);
uint64_t InDegree(dgl_id_t vid) const override {
return GetInCSR()->OutDegree(vid);
}
/*!
......@@ -570,8 +693,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id array.
* \return the in degree array
*/
DegreeArray InDegrees(IdArray vids) const {
return this->GetInCSR()->GetDegrees(vids);
DegreeArray InDegrees(IdArray vids) const override {
return GetInCSR()->OutDegrees(vids);
}
/*!
......@@ -579,9 +702,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the out degree
*/
uint64_t OutDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetOutCSR()->GetDegree(vid);
uint64_t OutDegree(dgl_id_t vid) const override {
return GetOutCSR()->OutDegree(vid);
}
/*!
......@@ -589,8 +711,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id array.
* \return the out degree array
*/
DegreeArray OutDegrees(IdArray vids) const {
return this->GetOutCSR()->GetDegrees(vids);
DegreeArray OutDegrees(IdArray vids) const override {
return GetOutCSR()->OutDegrees(vids);
}
/*!
......@@ -609,7 +731,7 @@ class ImmutableGraph: public GraphInterface {
* \param vids The vertices in the subgraph.
* \return the induced subgraph
*/
Subgraph VertexSubgraph(IdArray vids) const;
Subgraph VertexSubgraph(IdArray vids) const override;
/*!
* \brief Construct the induced edge subgraph of the given edges.
......@@ -627,7 +749,7 @@ class ImmutableGraph: public GraphInterface {
* \param eids The edges in the subgraph.
* \return the induced edge subgraph
*/
Subgraph EdgeSubgraph(IdArray eids) const;
Subgraph EdgeSubgraph(IdArray eids) const override;
/*!
* \brief Return a new graph with all the edges reversed.
......@@ -636,8 +758,12 @@ class ImmutableGraph: public GraphInterface {
*
* \return the reversed graph
*/
GraphPtr Reverse() const {
return GraphPtr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_));
GraphPtr Reverse() const override {
if (coo_) {
return GraphPtr(new ImmutableGraph(out_csr_, in_csr_, coo_->Transpose()));
} else {
return GraphPtr(new ImmutableGraph(out_csr_, in_csr_));
}
}
/*!
......@@ -645,9 +771,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the successor vector
*/
DGLIdIters SuccVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->indices.begin() + out_csr_->indptr[vid],
out_csr_->indices.begin() + out_csr_->indptr[vid + 1]);
DGLIdIters SuccVec(dgl_id_t vid) const override {
return GetOutCSR()->SuccVec(vid);
}
/*!
......@@ -655,9 +780,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the out edge id vector
*/
DGLIdIters OutEdgeVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid],
out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]);
DGLIdIters OutEdgeVec(dgl_id_t vid) const override {
return GetOutCSR()->OutEdgeVec(vid);
}
/*!
......@@ -665,9 +789,8 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the predecessor vector
*/
DGLIdIters PredVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->indices.begin() + in_csr_->indptr[vid],
in_csr_->indices.begin() + in_csr_->indptr[vid + 1]);
DGLIdIters PredVec(dgl_id_t vid) const override {
return GetInCSR()->SuccVec(vid);
}
/*!
......@@ -675,16 +798,15 @@ class ImmutableGraph: public GraphInterface {
* \param vid The vertex id.
* \return the in edge id vector
*/
DGLIdIters InEdgeVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid],
in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]);
DGLIdIters InEdgeVec(dgl_id_t vid) const override {
return GetInCSR()->OutEdgeVec(vid);
}
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() {
GraphInterface *Reset() override {
ImmutableGraph* gptr = new ImmutableGraph();
*gptr = std::move(*this);
return gptr;
......@@ -699,95 +821,76 @@ class ImmutableGraph: public GraphInterface {
* \param fmt the format of the returned adjacency matrix.
* \return a vector of three IdArray.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const;
std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const override;
/*
* The immutable graph may only contain one of the CSRs (e.g., the sampled subgraphs).
* When we get in csr or out csr, we try to get the one cached in the structure.
* If not, we transpose the other one to get the one we need.
*/
CSR::Ptr GetInCSR() const {
if (in_csr_) {
return in_csr_;
} else {
CHECK(out_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->in_csr_ = out_csr_->Transpose();
return in_csr_;
/* !\brief Return in csr. If not exist, transpose the other one.*/
CSRPtr GetInCSR() const {
if (!in_csr_) {
if (out_csr_) {
const_cast<ImmutableGraph*>(this)->in_csr_ = out_csr_->Transpose();
} else {
CHECK(coo_) << "None of CSR, COO exist";
const_cast<ImmutableGraph*>(this)->in_csr_ = coo_->Transpose()->ToCSR();
}
}
return in_csr_;
}
CSR::Ptr GetOutCSR() const {
if (out_csr_) {
return out_csr_;
} else {
CHECK(in_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->out_csr_ = in_csr_->Transpose();
return out_csr_;
/* !\brief Return out csr. If not exist, transpose the other one.*/
CSRPtr GetOutCSR() const {
if (!out_csr_) {
if (in_csr_) {
const_cast<ImmutableGraph*>(this)->out_csr_ = in_csr_->Transpose();
} else {
CHECK(coo_) << "None of CSR, COO exist";
const_cast<ImmutableGraph*>(this)->out_csr_ = coo_->ToCSR();
}
}
return out_csr_;
}
/*
* The edge list is required for FindEdge/FindEdges/EdgeSubgraph, if no such function is called, we would not create edge list.
* if such function is called the first time, we create a edge list from one of the graph's csr representations,
* if we have called such function before, we get the one cached in the structure.
*/
EdgeList::Ptr GetEdgeList() const {
if (edge_list_)
return edge_list_;
if (in_csr_) {
const_cast<ImmutableGraph *>(this)->edge_list_ =\
EdgeList::FromCSR(in_csr_->indptr, in_csr_->indices, in_csr_->edge_ids, true);
} else {
CHECK(out_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->edge_list_ =\
EdgeList::FromCSR(out_csr_->indptr, out_csr_->indices, out_csr_->edge_ids, false);
/* !\brief Return coo. If not exist, create from csr.*/
COOPtr GetCOO() const {
if (!coo_) {
if (in_csr_) {
const_cast<ImmutableGraph*>(this)->coo_ = in_csr_->ToCOO()->Transpose();
} else {
CHECK(out_csr_) << "Both CSR are missing.";
const_cast<ImmutableGraph*>(this)->coo_ = out_csr_->ToCOO();
}
}
return edge_list_;
return coo_;
}
/*!
* \brief Get the CSR array that represents the in-edges.
* This method copies data from std::vector to IdArray.
* \param start the first row to copy.
* \param end the last row to copy (exclusive).
* \return the CSR array.
*/
CSRArray GetInCSRArray(size_t start, size_t end) const;
protected:
/* !\brief internal default constructor */
ImmutableGraph() {}
/*!
* \brief Get the CSR array that represents the out-edges.
* This method copies data from std::vector to IdArray.
* \param start the first row to copy.
* \param end the last row to copy (exclusive).
* \return the CSR array.
*/
CSRArray GetOutCSRArray(size_t start, size_t end) const;
/* !\brief internal constructor for all the members */
ImmutableGraph(CSRPtr in_csr, CSRPtr out_csr, COOPtr coo)
: in_csr_(in_csr), out_csr_(out_csr), coo_(coo) {
CHECK(AnyGraph()) << "At least one graph structure should exist.";
}
protected:
DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
/* !\brief return pointer to any available graph structure */
GraphPtr AnyGraph() const {
if (in_csr_) {
return in_csr_;
} else if (out_csr_) {
return out_csr_;
} else {
return coo_;
}
}
/*!
* \brief Compact a subgraph.
* In a sampled subgraph, the vertex Id is still in the ones in the original graph.
* We want to convert them to the subgraph Ids.
*/
void CompactSubgraph(IdArray induced_vertices);
// Store the in-edges.
CSR::Ptr in_csr_;
// Store the out-edges.
CSR::Ptr out_csr_;
// Store the edge list indexed by edge id
EdgeList::Ptr edge_list_;
/*!
* \brief Whether if this is a multigraph.
*
* When a multiedge is added, this flag switches to true.
*/
bool is_multigraph_ = false;
// Store the in csr (i.e, the reverse csr)
CSRPtr in_csr_;
// Store the out csr (i.e, the normal csr)
CSRPtr out_csr_;
// Store the edge list indexed by edge id (COO)
COOPtr coo_;
};
} // namespace dgl
#endif // DGL_IMMUTABLE_GRAPH_H_
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/lazy.h
* \brief Lazy object that will be materialized only when being queried.
*/
#ifndef DGL_LAZY_H_
#define DGL_LAZY_H_
#include <memory>
namespace dgl {
/*!
* \brief Lazy object that will be materialized only when being queried.
*
* The object should be immutable -- no mutation once materialized.
* The object is currently not threaad safe.
*/
template <typename T>
class LazyObject {
public:
/*!\brief default constructor to construct a lazy object */
LazyObject() {}
/*!\brief constructor to construct an object with given value (non-lazy case) */
explicit LazyObject(const T& val): ptr_(new T(val)) {}
/*!\brief destructor */
~LazyObject() = default;
/*!
* \brief Get the value of this object. If the object has not been instantiated,
* using the provided function to create it.
* \param fn The creator function.
* \return the object value.
*/
template <typename Fn>
const T& Get(Fn fn) {
if (!ptr_) {
ptr_.reset(new T(fn()));
}
return *ptr_;
}
private:
/*!\brief the internal data pointer */
std::shared_ptr<T> ptr_{nullptr};
};
} // namespace dgl
#endif // DGL_LAZY_H_
......@@ -92,6 +92,8 @@ class NDArray {
inline int use_count() const;
/*! \return Pointer to content of DLTensor */
inline const DLTensor* operator->() const;
/*! \return True if the ndarray is contiguous. */
bool IsContiguous() const;
/*!
* \brief Copy data content from another array.
* \param other The source array to be copied from.
......@@ -129,10 +131,11 @@ class NDArray {
* \brief Create a NDArray that shares the data memory with the current one.
* \param shape The shape of the new array.
* \param dtype The data type of the new array.
* \param offset The offset (in bytes) of the starting pointer.
* \note The memory size of new array must be smaller than the current one.
*/
DGL_DLL NDArray CreateView(
std::vector<int64_t> shape, DLDataType dtype);
std::vector<int64_t> shape, DLDataType dtype, int64_t offset = 0);
/*!
* \brief Create a reference view of NDArray that
* represents as DLManagedTensor.
......
......@@ -898,6 +898,7 @@ class DGLGraph(DGLBaseGraph):
readonly=False):
# graph
super(DGLGraph, self).__init__(create_graph_index(graph_data, multigraph, readonly))
# node and edge frame
if node_frame is None:
self._node_frame = FrameRef(Frame(num_rows=self.number_of_nodes()))
......@@ -1977,7 +1978,7 @@ class DGLGraph(DGLBaseGraph):
assert func is not None
if is_all(edges):
u, v, _ = self._graph.edges()
u, v, _ = self._graph.edges('eid')
eid = utils.toindex(slice(0, self.number_of_edges()))
elif isinstance(edges, tuple):
u, v = edges
......
......@@ -51,21 +51,19 @@ class GraphIndex(object):
self._multigraph = multigraph
self._readonly = readonly
if readonly:
self._init(src, dst, utils.toindex(F.arange(0, len(src))), n_nodes)
self._init(src, dst, n_nodes)
else:
self._handle = _CAPI_DGLGraphCreateMutable(multigraph)
self.clear()
self.add_nodes(n_nodes)
self.add_edges(src, dst)
def _init(self, src_ids, dst_ids, edge_ids, num_nodes):
def _init(self, src_ids, dst_ids, num_nodes):
"""The actual init function"""
assert len(src_ids) == len(dst_ids)
assert len(src_ids) == len(edge_ids)
self._handle = _CAPI_DGLGraphCreate(
src_ids.todgltensor(),
dst_ids.todgltensor(),
edge_ids.todgltensor(),
self._multigraph,
int(num_nodes),
self._readonly)
......@@ -631,8 +629,9 @@ class GraphIndex(object):
indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
shuffle = utils.toindex(rst(2))
dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
return F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0], shuffle
spmat = F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0]
return spmat, shuffle
elif fmt == "coo":
## FIXME(minjie): data type
idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
......@@ -786,13 +785,11 @@ class GraphIndex(object):
for e in nx_graph.edges:
src.append(e[0])
dst.append(e[1])
eid = np.arange(0, len(src), dtype=np.int64)
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
eid = utils.toindex(eid)
src = utils.toindex(src)
dst = utils.toindex(dst)
self._init(src, dst, eid, num_nodes)
self._init(src, dst, num_nodes)
def from_scipy_sparse_matrix(self, adj):
......@@ -808,9 +805,7 @@ class GraphIndex(object):
adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col)
edge_ids = utils.toindex(F.arange(0, len(adj_coo.row)))
self._init(src, dst, edge_ids, num_nodes)
self._init(src, dst, num_nodes)
def from_csr_matrix(self, indptr, indices, edge_dir, shared_mem_name=""):
"""Load a graph from the CSR matrix.
......@@ -881,8 +876,7 @@ class GraphIndex(object):
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
edge_ids = utils.toindex(F.arange(0, len(src)))
self._init(src_ids, dst_ids, edge_ids, num_nodes)
self._init(src_ids, dst_ids, num_nodes)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
......
......@@ -191,7 +191,7 @@ def schedule_update_all(graph,
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
src, dst, _ = graph._graph.edges()
src, dst, _ = graph._graph.edges('eid')
return var.IDX(src), var.IDX(dst)
adj_creator = lambda: spmv.build_adj_matrix_graph(graph)
inc_creator = lambda: spmv.build_inc_matrix_graph(graph)
......
/*!
* Copyright (c) 2019 by Contributors
* \file array.cc
* \brief DGL array utilities implementation
*/
#include <dgl/array.h>
namespace dgl {
// TODO(minjie): currently these operators are only on CPU.
IdArray NewIdArray(int64_t length) {
return IdArray::Empty({length}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
}
IdArray VecToIdArray(const std::vector<dgl_id_t>& vec) {
IdArray ret = NewIdArray(vec.size());
std::copy(vec.begin(), vec.end(), static_cast<dgl_id_t*>(ret->data));
return ret;
}
IdArray Clone(IdArray arr) {
IdArray ret = NewIdArray(arr->shape[0]);
ret.CopyFrom(arr);
return ret;
}
IdArray Add(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] + rhs_data[i];
}
return ret;
}
IdArray Sub(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] - rhs_data[i];
}
return ret;
}
IdArray Mul(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] * rhs_data[i];
}
return ret;
}
IdArray Div(IdArray lhs, IdArray rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] / rhs_data[i];
}
return ret;
}
IdArray Add(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] + rhs;
}
return ret;
}
IdArray Sub(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] - rhs;
}
return ret;
}
IdArray Mul(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] * rhs;
}
return ret;
}
IdArray Div(IdArray lhs, dgl_id_t rhs) {
IdArray ret = NewIdArray(lhs->shape[0]);
const dgl_id_t* lhs_data = static_cast<dgl_id_t*>(lhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = lhs_data[i] / rhs;
}
return ret;
}
IdArray Add(dgl_id_t lhs, IdArray rhs) {
return Add(rhs, lhs);
}
IdArray Sub(dgl_id_t lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0]);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < rhs->shape[0]; ++i) {
ret_data[i] = lhs - rhs_data[i];
}
return ret;
}
IdArray Mul(dgl_id_t lhs, IdArray rhs) {
return Mul(rhs, lhs);
}
IdArray Div(dgl_id_t lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0]);
const dgl_id_t* rhs_data = static_cast<dgl_id_t*>(rhs->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < rhs->shape[0]; ++i) {
ret_data[i] = lhs / rhs_data[i];
}
return ret;
}
IdArray HStack(IdArray arr1, IdArray arr2) {
CHECK_EQ(arr1->shape[0], arr2->shape[0]);
const int64_t L = arr1->shape[0];
IdArray ret = NewIdArray(2 * L);
const dgl_id_t* arr1_data = static_cast<dgl_id_t*>(arr1->data);
const dgl_id_t* arr2_data = static_cast<dgl_id_t*>(arr2->data);
dgl_id_t* ret_data = static_cast<dgl_id_t*>(ret->data);
for (int64_t i = 0; i < L; ++i) {
ret_data[i] = arr1_data[i];
ret_data[i + L] = arr2_data[i];
}
return ret;
}
CSRMatrix SliceRows(const CSRMatrix& csr, int64_t start, int64_t end) {
const dgl_id_t* indptr = static_cast<dgl_id_t*>(csr.indptr->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(csr.indices->data);
const dgl_id_t* data = static_cast<dgl_id_t*>(csr.data->data);
const int64_t num_rows = end - start;
const int64_t nnz = indptr[end] - indptr[start];
CSRMatrix ret;
ret.indptr = NewIdArray(num_rows + 1);
ret.indices = NewIdArray(nnz);
ret.data = NewIdArray(nnz);
dgl_id_t* r_indptr = static_cast<dgl_id_t*>(ret.indptr->data);
dgl_id_t* r_indices = static_cast<dgl_id_t*>(ret.indices->data);
dgl_id_t* r_data = static_cast<dgl_id_t*>(ret.data->data);
for (int64_t i = start; i < end + 1; ++i) {
r_indptr[i - start] = indptr[i] - indptr[start];
}
std::copy(indices + indptr[start], indices + indptr[end], r_indices);
std::copy(data + indptr[start], data + indptr[end], r_data);
return ret;
}
} // namespace dgl
......@@ -13,15 +13,6 @@ using dgl::runtime::NDArray;
namespace dgl {
DLManagedTensor* CreateTmpDLManagedTensor(const DGLArgValue& arg) {
const DLTensor* dl_tensor = arg;
DLManagedTensor* ret = new DLManagedTensor();
ret->deleter = [] (DLManagedTensor* self) { delete self; };
ret->manager_ctx = nullptr;
ret->dl_tensor = *dl_tensor;
return ret;
}
PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
auto body = [vec](DGLArgs args, DGLRetValue* rv) {
const uint64_t which = args[0];
......
......@@ -19,13 +19,6 @@ typedef void* GraphHandle;
// Communicator handler type
typedef void* CommunicatorHandle;
/*!
* \brief Convert the given DLTensor to DLManagedTensor.
*
* Return a temporary DLManagedTensor that does not own memory.
*/
DLManagedTensor* CreateTmpDLManagedTensor(
const dgl::runtime::DGLArgValue& arg);
/*!
* \brief Convert a vector of NDArray to PackedFunc.
......
......@@ -14,33 +14,28 @@
namespace dgl {
Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
Graph::Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes,
bool multigraph): is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids));
CHECK(IsValidIdArray(dst_ids));
CHECK(IsValidIdArray(edge_ids));
this->AddVertices(num_nodes);
num_edges_ = src_ids->shape[0];
CHECK(static_cast<int64_t>(num_edges_) == dst_ids->shape[0])
<< "vectors in COO must have the same length";
CHECK(static_cast<int64_t>(num_edges_) == edge_ids->shape[0])
<< "vectors in COO must have the same length";
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
all_edges_src_.reserve(num_edges_);
all_edges_dst_.reserve(num_edges_);
for (uint64_t i = 0; i < num_edges_; i++) {
auto src = src_data[i];
auto dst = dst_data[i];
auto eid = edge_data[i];
CHECK(HasVertex(src) && HasVertex(dst))
<< "Invalid vertices: src=" << src << " dst=" << dst;
adjlist_[src].succ.push_back(dst);
adjlist_[src].edge_id.push_back(eid);
adjlist_[src].edge_id.push_back(i);
reverse_adjlist_[dst].succ.push_back(src);
reverse_adjlist_[dst].edge_id.push_back(eid);
reverse_adjlist_[dst].edge_id.push_back(i);
all_edges_src_.push_back(src);
all_edges_dst_.push_back(dst);
......@@ -104,7 +99,7 @@ BoolArray Graph::HasVertices(IdArray vids) const {
int64_t* rst_data = static_cast<int64_t*>(rst->data);
const int64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
rst_data[i] = (vid_data[i] < nverts && vid_data[i] >= 0)? 1 : 0;
}
return rst;
}
......
......@@ -77,14 +77,13 @@ namespace {
template<typename T>
void DGLDisjointPartitionByNum(const T *gptr, DGLArgs args, DGLRetValue *rv) {
int64_t num = args[1];
const std::vector<T> &&rst = GraphOp::DisjointPartitionByNum(gptr, num);
std::vector<T> &&rst = GraphOp::DisjointPartitionByNum(gptr, num);
// return the pointer array as an integer array
const int64_t len = rst.size();
NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *ptr_array_data = static_cast<int64_t *>(ptr_array->data);
for (size_t i = 0; i < rst.size(); ++i) {
T *ptr = new T();
*ptr = std::move(rst[i]);
GraphInterface *ptr = rst[i].Reset();
ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
}
*rv = ptr_array;
......@@ -100,9 +99,7 @@ void DGLDisjointUnion(GraphHandle *inhandles, int list_size, DGLRetValue *rv) {
graphs.push_back(gr);
}
T *gptr = new T();
*gptr = GraphOp::DisjointUnion(std::move(graphs));
GraphHandle ghandle = gptr;
GraphHandle ghandle = GraphOp::DisjointUnion(std::move(graphs)).Reset();
*rv = ghandle;
}
......@@ -114,8 +111,7 @@ void DGLDisjointPartitionBySizes(const T *gptr, const IdArray sizes, DGLRetValue
NDArray ptr_array = NDArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *ptr_array_data = static_cast<int64_t *>(ptr_array->data);
for (size_t i = 0; i < rst.size(); ++i) {
T *ptr = new T();
*ptr = std::move(rst[i]);
GraphInterface *ptr = rst[i].Reset();
ptr_array_data[i] = reinterpret_cast<std::intptr_t>(ptr);
}
*rv = ptr_array;
......@@ -133,39 +129,48 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const bool multigraph = static_cast<bool>(args[3]);
const int64_t num_nodes = static_cast<int64_t>(args[4]);
const bool readonly = static_cast<bool>(args[5]);
const IdArray src_ids = args[0];
const IdArray dst_ids = args[1];
const bool multigraph = static_cast<bool>(args[2]);
const int64_t num_nodes = static_cast<int64_t>(args[3]);
const bool readonly = static_cast<bool>(args[4]);
GraphHandle ghandle;
if (readonly)
ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
else
ghandle = new Graph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
if (readonly) {
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
COOPtr coo(new COO(num_nodes, Clone(src_ids), Clone(dst_ids), multigraph));
ghandle = new ImmutableGraph(coo);
} else {
ghandle = new Graph(src_ids, dst_ids, num_nodes, multigraph);
}
*rv = ghandle;
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray indptr = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray indices = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray indptr = args[0];
const IdArray indices = args[1];
const IdArray edge_ids = args[2];
const std::string shared_mem_name = args[3];
const bool multigraph = static_cast<bool>(args[4]);
const std::string edge_dir = args[5];
ImmutableGraph::CSR::Ptr csr;
CSRPtr csr;
if (shared_mem_name.empty())
csr.reset(new ImmutableGraph::CSR(indptr, indices, edge_ids));
// TODO(minjie): The array copy here is unnecessary and adds extra overhead.
// However, with MXNet backend, the memory would be corrupted if we directly
// save the passed-in ndarrays into DGL's graph object. We hope MXNet team
// could help look into this.
csr.reset(new CSR(Clone(indptr), Clone(indices), Clone(edge_ids), multigraph));
else
csr.reset(new ImmutableGraph::CSR(indptr, indices, edge_ids, shared_mem_name));
csr.reset(new CSR(indptr, indices, edge_ids, multigraph, shared_mem_name));
GraphHandle ghandle;
if (edge_dir == "in")
ghandle = new ImmutableGraph(csr, nullptr, multigraph);
ghandle = new ImmutableGraph(csr, nullptr);
else
ghandle = new ImmutableGraph(nullptr, csr, multigraph);
ghandle = new ImmutableGraph(nullptr, csr);
*rv = ghandle;
});
......@@ -176,13 +181,13 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreateMMap")
const int64_t num_edges = args[2];
const bool multigraph = static_cast<bool>(args[3]);
const std::string edge_dir = args[4];
ImmutableGraph::CSR::Ptr csr(new ImmutableGraph::CSR(shared_mem_name,
num_vertices, num_edges));
// TODO(minjie): how to know multigraph
CSRPtr csr(new CSR(shared_mem_name, num_vertices, num_edges, multigraph));
GraphHandle ghandle;
if (edge_dir == "in")
ghandle = new ImmutableGraph(csr, nullptr, multigraph);
ghandle = new ImmutableGraph(csr, nullptr);
else
ghandle = new ImmutableGraph(nullptr, csr, multigraph);
ghandle = new ImmutableGraph(nullptr, csr);
*rv = ghandle;
});
......@@ -214,8 +219,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
gptr->AddEdges(src, dst);
});
......@@ -268,14 +273,14 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->HasVertices(vids);
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray parent_vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray query = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray parent_vids = args[0];
const IdArray query = args[1];
*rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
});
......@@ -292,8 +297,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
*rv = gptr->HasEdgesBetween(src, dst);
});
......@@ -328,8 +333,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const IdArray src = args[1];
const IdArray dst = args[2];
*rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst));
});
......@@ -337,7 +342,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray eids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids));
});
......@@ -353,7 +358,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids));
});
......@@ -369,7 +374,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids));
});
......@@ -393,7 +398,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->InDegrees(vids);
});
......@@ -409,7 +414,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = gptr->OutDegrees(vids);
});
......@@ -417,7 +422,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray vids = args[1];
*rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids));
});
......@@ -425,7 +430,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const GraphInterface *gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray eids = args[1];
*rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids));
});
......@@ -462,7 +467,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray sizes = args[1];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const Graph* gptr = dynamic_cast<const Graph*>(ptr);
const ImmutableGraph* im_gptr = dynamic_cast<const ImmutableGraph*>(ptr);
......@@ -497,17 +502,4 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
*rv = ConvertAdjToPackedFunc(res);
});
DGL_REGISTER_GLOBAL("nodeflow._CAPI_NodeFlowGetBlockAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::string format = args[1];
int64_t layer0_size = args[2];
int64_t start = args[3];
int64_t end = args[4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph* gptr = dynamic_cast<const ImmutableGraph*>(ptr);
auto res = GetNodeFlowSlice(*gptr, format, layer0_size, start, end, true);
*rv = ConvertAdjToPackedFunc(res);
});
} // namespace dgl
......@@ -108,40 +108,44 @@ std::vector<Graph> GraphOp::DisjointPartitionBySizes(const Graph* graph, IdArray
ImmutableGraph GraphOp::DisjointUnion(std::vector<const ImmutableGraph *> graphs) {
dgl_id_t num_nodes = 0;
dgl_id_t num_edges = 0;
int64_t num_nodes = 0;
int64_t num_edges = 0;
for (const ImmutableGraph *gr : graphs) {
num_nodes += gr->NumVertices();
num_edges += gr->NumEdges();
}
ImmutableGraph::CSR::Ptr batched_csr_ptr = std::make_shared<ImmutableGraph::CSR>(num_nodes,
num_edges);
batched_csr_ptr->indptr[0] = 0;
IdArray indptr_arr = NewIdArray(num_nodes + 1);
IdArray indices_arr = NewIdArray(num_edges);
IdArray edge_ids_arr = NewIdArray(num_edges);
dgl_id_t* indptr = static_cast<dgl_id_t*>(indptr_arr->data);
dgl_id_t* indices = static_cast<dgl_id_t*>(indices_arr->data);
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(edge_ids_arr->data);
indptr[0] = 0;
dgl_id_t cum_num_nodes = 0;
dgl_id_t cum_num_edges = 0;
dgl_id_t indptr_idx = 1;
for (const ImmutableGraph *gr : graphs) {
const ImmutableGraph::CSR::Ptr &g_csrptr = gr->GetInCSR();
dgl_id_t g_num_nodes = g_csrptr->NumVertices();
dgl_id_t g_num_edges = g_csrptr->NumEdges();
ImmutableGraph::CSR::vector<dgl_id_t> &g_indices = g_csrptr->indices;
ImmutableGraph::CSR::vector<int64_t> &g_indptr = g_csrptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &g_edge_ids = g_csrptr->edge_ids;
for (dgl_id_t i = 1; i < g_indptr.size(); ++i) {
batched_csr_ptr->indptr[indptr_idx] = g_indptr[i] + cum_num_edges;
indptr_idx++;
const CSRPtr g_csrptr = gr->GetInCSR();
const int64_t g_num_nodes = g_csrptr->NumVertices();
const int64_t g_num_edges = g_csrptr->NumEdges();
dgl_id_t* g_indptr = static_cast<dgl_id_t*>(g_csrptr->indptr()->data);
dgl_id_t* g_indices = static_cast<dgl_id_t*>(g_csrptr->indices()->data);
dgl_id_t* g_edge_ids = static_cast<dgl_id_t*>(g_csrptr->edge_ids()->data);
for (dgl_id_t i = 1; i < g_num_nodes + 1; ++i) {
indptr[cum_num_nodes + i] = g_indptr[i] + cum_num_edges;
}
for (dgl_id_t i = 0; i < g_indices.size(); ++i) {
batched_csr_ptr->indices.push_back(g_indices[i] + cum_num_nodes);
for (dgl_id_t i = 0; i < g_num_edges; ++i) {
indices[cum_num_edges + i] = g_indices[i] + cum_num_nodes;
}
for (dgl_id_t i = 0; i < g_edge_ids.size(); ++i) {
batched_csr_ptr->edge_ids.push_back(g_edge_ids[i] + cum_num_edges);
for (dgl_id_t i = 0; i < g_num_edges; ++i) {
edge_ids[cum_num_edges + i] = g_edge_ids[i] + cum_num_edges;
}
cum_num_nodes += g_num_nodes;
cum_num_edges += g_num_edges;
}
CSRPtr batched_csr_ptr = CSRPtr(new CSR(indptr_arr, indices_arr, edge_ids_arr));
return ImmutableGraph(batched_csr_ptr, nullptr);
}
......@@ -157,9 +161,11 @@ std::vector<ImmutableGraph> GraphOp::DisjointPartitionByNum(const ImmutableGraph
std::vector<ImmutableGraph> GraphOp::DisjointPartitionBySizes(const ImmutableGraph *batched_graph,
IdArray sizes) {
// TODO(minjie): use array views to speedup this operation
const int64_t len = sizes->shape[0];
const int64_t *sizes_data = static_cast<int64_t *>(sizes->data);
std::vector<int64_t> cumsum;
cumsum.reserve(len + 1);
cumsum.push_back(0);
for (int64_t i = 0; i < len; ++i) {
cumsum.push_back(cumsum[i] + sizes_data[i]);
......@@ -167,35 +173,40 @@ std::vector<ImmutableGraph> GraphOp::DisjointPartitionBySizes(const ImmutableGra
CHECK_EQ(cumsum[len], batched_graph->NumVertices())
<< "Sum of the given sizes must equal to the number of nodes.";
std::vector<ImmutableGraph> rst;
const ImmutableGraph::CSR::Ptr &in_csr_ptr = batched_graph->GetInCSR();
ImmutableGraph::CSR::vector<int64_t> &bg_indptr = in_csr_ptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &bg_indices = in_csr_ptr->indices;
CSRPtr in_csr_ptr = batched_graph->GetInCSR();
const dgl_id_t* indptr = static_cast<dgl_id_t*>(in_csr_ptr->indptr()->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(in_csr_ptr->indices()->data);
const dgl_id_t* edge_ids = static_cast<dgl_id_t*>(in_csr_ptr->edge_ids()->data);
dgl_id_t cum_sum_edges = 0;
for (int64_t i = 0; i < len; ++i) {
int64_t start_pos = cumsum[i];
int64_t end_pos = cumsum[i + 1];
int64_t g_num_edges = bg_indptr[end_pos] - bg_indptr[start_pos];
ImmutableGraph::CSR::Ptr g_in_csr_ptr = std::make_shared<ImmutableGraph::CSR>(sizes_data[i],
g_num_edges);
ImmutableGraph::CSR::vector<int64_t> &g_indptr = g_in_csr_ptr->indptr;
ImmutableGraph::CSR::vector<dgl_id_t> &g_indices = g_in_csr_ptr->indices;
ImmutableGraph::CSR::vector<dgl_id_t> &g_edge_ids = g_in_csr_ptr->edge_ids;
const int64_t start_pos = cumsum[i];
const int64_t end_pos = cumsum[i + 1];
const int64_t g_num_nodes = sizes_data[i];
const int64_t g_num_edges = indptr[end_pos] - indptr[start_pos];
IdArray indptr_arr = NewIdArray(g_num_nodes + 1);
IdArray indices_arr = NewIdArray(g_num_edges);
IdArray edge_ids_arr = NewIdArray(g_num_edges);
dgl_id_t* g_indptr = static_cast<dgl_id_t*>(indptr_arr->data);
dgl_id_t* g_indices = static_cast<dgl_id_t*>(indices_arr->data);
dgl_id_t* g_edge_ids = static_cast<dgl_id_t*>(edge_ids_arr->data);
const dgl_id_t idoff = indptr[start_pos];
g_indptr[0] = 0;
for (int l = start_pos + 1; l < end_pos + 1; ++l) {
g_indptr[l - start_pos] = bg_indptr[l] - bg_indptr[start_pos];
g_indptr[l - start_pos] = indptr[l] - indptr[start_pos];
}
for (int j = bg_indptr[start_pos]; j < bg_indptr[end_pos]; ++j) {
g_indices.push_back(bg_indices[j] - cumsum[i]);
for (int j = indptr[start_pos]; j < indptr[end_pos]; ++j) {
g_indices[j - idoff] = indices[j] - cumsum[i];
}
for (int k = bg_indptr[start_pos]; k < bg_indptr[end_pos]; ++k) {
g_edge_ids.push_back(in_csr_ptr->edge_ids[k] - cum_sum_edges);
for (int k = indptr[start_pos]; k < indptr[end_pos]; ++k) {
g_edge_ids[k - idoff] = edge_ids[k] - cum_sum_edges;
}
cum_sum_edges += g_num_edges;
ImmutableGraph graph(g_in_csr_ptr, nullptr);
rst.push_back(graph);
CSRPtr g_in_csr_ptr = CSRPtr(new CSR(indptr_arr, indices_arr, edge_ids_arr));
rst.emplace_back(g_in_csr_ptr, nullptr);
}
return rst;
}
......
......@@ -4,150 +4,235 @@
* \brief DGL immutable graph index implementation
*/
#include <string.h>
#include <sys/types.h>
#include <dgl/immutable_graph.h>
#ifdef _MSC_VER
#define _CRT_RAND_S
#endif
#include <string.h>
#include <bitset>
#include <numeric>
#include <tuple>
#include "../c_api_common.h"
namespace dgl {
namespace {
/*!
* \brief A hashmap that maps each ids in the given array to new ids starting from zero.
*/
class IdHashMap {
public:
// Construct the hashmap using the given id arrays.
// The id array could contain duplicates.
explicit IdHashMap(IdArray ids): filter_(kFilterSize, false) {
const dgl_id_t* ids_data = static_cast<dgl_id_t*>(ids->data);
const int64_t len = ids->shape[0];
dgl_id_t newid = 0;
for (int64_t i = 0; i < len; ++i) {
const dgl_id_t id = ids_data[i];
if (!Contains(id)) {
oldv2newv_[id] = newid++;
filter_[id & kFilterMask] = true;
}
}
}
template<class ForwardIt, class T>
bool binary_search(ForwardIt first, ForwardIt last, const T& value) {
first = std::lower_bound(first, last, value);
return (!(first == last) && !(value < *first));
}
// Return true if the given id is contained in this hashmap.
bool Contains(dgl_id_t id) const {
return filter_[id & kFilterMask] && oldv2newv_.count(id);
}
ImmutableGraph::CSR::CSR(int64_t num_vertices, int64_t expected_num_edges):
indptr(num_vertices + 1), indices(expected_num_edges), edge_ids(expected_num_edges) {
indptr.resize(num_vertices + 1);
}
// Return the new id of the given id. If the given id is not contained
// in the hash map, returns the default_val instead.
dgl_id_t Map(dgl_id_t id, dgl_id_t default_val) const {
if (filter_[id & kFilterMask]) {
auto it = oldv2newv_.find(id);
return (it == oldv2newv_.end()) ? default_val : it->second;
} else {
return default_val;
}
}
ImmutableGraph::CSR::CSR(IdArray indptr_arr, IdArray index_arr, IdArray edge_id_arr):
indptr(indptr_arr->shape[0]), indices(index_arr->shape[0]), edge_ids(index_arr->shape[0]) {
size_t num_vertices = indptr_arr->shape[0] - 1;
size_t num_edges = index_arr->shape[0];
const int64_t *indptr_data = static_cast<int64_t*>(indptr_arr->data);
const dgl_id_t *indices_data = static_cast<dgl_id_t*>(index_arr->data);
const dgl_id_t *edge_id_data = static_cast<dgl_id_t*>(edge_id_arr->data);
CHECK_EQ(indptr_data[0], 0);
CHECK_EQ(indptr_data[num_vertices], num_edges);
indptr.insert_back(indptr_data, num_vertices + 1);
indices.insert_back(indices_data, num_edges);
edge_ids.insert_back(edge_id_data, num_edges);
}
private:
static constexpr int32_t kFilterMask = 0xFFFFFF;
static constexpr int32_t kFilterSize = kFilterMask + 1;
// This bitmap is used as a bloom filter to remove some lookups.
// Hashtable is very slow. Using bloom filter can significantly speed up lookups.
std::vector<bool> filter_;
// The hashmap from old vid to new vid
std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv_;
};
ImmutableGraph::CSR::CSR(IdArray indptr_arr, IdArray index_arr, IdArray edge_id_arr,
const std::string &shared_mem_name) {
#ifndef _WIN32
size_t num_vertices = indptr_arr->shape[0] - 1;
size_t num_edges = index_arr->shape[0];
CHECK_EQ(num_edges, edge_id_arr->shape[0]);
size_t file_size = (num_vertices + 1) * sizeof(int64_t) + num_edges * sizeof(dgl_id_t) * 2;
auto mem = std::make_shared<runtime::SharedMemory>(shared_mem_name);
auto ptr = mem->create_new(file_size);
int64_t *addr1 = static_cast<int64_t *>(ptr);
indptr.init(addr1, num_vertices + 1);
void *addr = addr1 + num_vertices + 1;
dgl_id_t *addr2 = static_cast<dgl_id_t *>(addr);
indices.init(addr2, num_edges);
addr = addr2 + num_edges;
dgl_id_t *addr3 = static_cast<dgl_id_t *>(addr);
edge_ids.init(addr3, num_edges);
const int64_t *indptr_data = static_cast<int64_t*>(indptr_arr->data);
const dgl_id_t *indices_data = static_cast<dgl_id_t*>(index_arr->data);
const dgl_id_t *edge_id_data = static_cast<dgl_id_t*>(edge_id_arr->data);
CHECK_EQ(indptr_data[0], 0);
CHECK_EQ(indptr_data[num_vertices], num_edges);
indptr.insert_back(indptr_data, num_vertices + 1);
indices.insert_back(indices_data, num_edges);
edge_ids.insert_back(edge_id_data, num_edges);
this->mem = mem;
#else
LOG(FATAL) << "ImmutableGraph doesn't support shared memory in Windows yet";
#endif // _WIN32
}
struct PairHash {
template <class T1, class T2>
std::size_t operator() (const std::pair<T1, T2>& pair) const {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
ImmutableGraph::CSR::CSR(const std::string &shared_mem_name,
size_t num_vertices, size_t num_edges) {
std::tuple<IdArray, IdArray, IdArray> MapFromSharedMemory(
const std::string &shared_mem_name, int64_t num_verts, int64_t num_edges) {
#ifndef _WIN32
size_t file_size = (num_vertices + 1) * sizeof(int64_t) + num_edges * sizeof(dgl_id_t) * 2;
auto mem = std::make_shared<runtime::SharedMemory>(shared_mem_name);
auto ptr = mem->open(file_size);
int64_t *addr1 = static_cast<int64_t *>(ptr);
indptr.init(addr1, num_vertices + 1, num_vertices + 1);
void *addr = addr1 + num_vertices + 1;
dgl_id_t *addr2 = static_cast<dgl_id_t *>(addr);
indices.init(addr2, num_edges, num_edges);
addr = addr2 + num_edges;
dgl_id_t *addr3 = static_cast<dgl_id_t *>(addr);
edge_ids.init(addr3, num_edges, num_edges);
this->mem = mem;
const int64_t file_size = (num_verts + 1 + num_edges * 2) * sizeof(dgl_id_t);
IdArray sm_array = IdArray::EmptyShared(
shared_mem_name, {file_size}, DLDataType{kDLInt, 8, 1}, DLContext{kDLCPU, 0}, true);
// Create views from the shared memory array. Note that we don't need to save
// the sm_array because the refcount is maintained by the view arrays.
IdArray indptr = sm_array.CreateView({num_verts + 1}, DLDataType{kDLInt, 64, 1});
IdArray indices = sm_array.CreateView({num_edges}, DLDataType{kDLInt, 64, 1},
(num_verts + 1) * sizeof(dgl_id_t));
IdArray edge_ids = sm_array.CreateView({num_edges}, DLDataType{kDLInt, 64, 1},
(num_verts + 1 + num_edges) * sizeof(dgl_id_t));
return std::make_tuple(indptr, indices, edge_ids);
#else
LOG(FATAL) << "ImmutableGraph doesn't support shared memory in Windows yet";
LOG(FATAL) << "CSR graph doesn't support shared memory in Windows yet";
return {};
#endif // _WIN32
}
} // namespace
//////////////////////////////////////////////////////////
//
// CSR graph implementation
//
//////////////////////////////////////////////////////////
CSR::CSR(int64_t num_vertices, int64_t num_edges, bool is_multigraph)
: is_multigraph_(is_multigraph) {
indptr_ = NewIdArray(num_vertices + 1);
indices_ = NewIdArray(num_edges);
edge_ids_ = NewIdArray(num_edges);
}
CSR::CSR(IdArray indptr, IdArray indices, IdArray edge_ids)
: indptr_(indptr), indices_(indices), edge_ids_(edge_ids) {
CHECK(IsValidIdArray(indptr));
CHECK(IsValidIdArray(indices));
CHECK(IsValidIdArray(edge_ids));
CHECK_EQ(indices->shape[0], edge_ids->shape[0]);
}
CSR::CSR(IdArray indptr, IdArray indices, IdArray edge_ids, bool is_multigraph)
: indptr_(indptr), indices_(indices), edge_ids_(edge_ids), is_multigraph_(is_multigraph) {
CHECK(IsValidIdArray(indptr));
CHECK(IsValidIdArray(indices));
CHECK(IsValidIdArray(edge_ids));
CHECK_EQ(indices->shape[0], edge_ids->shape[0]);
}
CSR::CSR(IdArray indptr, IdArray indices, IdArray edge_ids,
const std::string &shared_mem_name) {
CHECK(IsValidIdArray(indptr));
CHECK(IsValidIdArray(indices));
CHECK(IsValidIdArray(edge_ids));
CHECK_EQ(indices->shape[0], edge_ids->shape[0]);
const int64_t num_verts = indptr->shape[0] - 1;
const int64_t num_edges = indices->shape[0];
std::tie(indptr_, indices_, edge_ids_) = MapFromSharedMemory(
shared_mem_name, num_verts, num_edges);
// copy the given data into the shared memory arrays
indptr_.CopyFrom(indptr);
indices_.CopyFrom(indices);
edge_ids_.CopyFrom(edge_ids);
}
CSR::CSR(IdArray indptr, IdArray indices, IdArray edge_ids, bool is_multigraph,
const std::string &shared_mem_name): is_multigraph_(is_multigraph) {
CHECK(IsValidIdArray(indptr));
CHECK(IsValidIdArray(indices));
CHECK(IsValidIdArray(edge_ids));
CHECK_EQ(indices->shape[0], edge_ids->shape[0]);
const int64_t num_verts = indptr->shape[0] - 1;
const int64_t num_edges = indices->shape[0];
std::tie(indptr_, indices_, edge_ids_) = MapFromSharedMemory(
shared_mem_name, num_verts, num_edges);
// copy the given data into the shared memory arrays
indptr_.CopyFrom(indptr);
indices_.CopyFrom(indices);
edge_ids_.CopyFrom(edge_ids);
}
CSR::CSR(const std::string &shared_mem_name,
int64_t num_verts, int64_t num_edges, bool is_multigraph)
: is_multigraph_(is_multigraph) {
std::tie(indptr_, indices_, edge_ids_) = MapFromSharedMemory(
shared_mem_name, num_verts, num_edges);
}
bool CSR::IsMultigraph() const {
// The lambda will be called the first time to initialize the is_multigraph flag.
return const_cast<CSR*>(this)->is_multigraph_.Get([this] () {
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
for (dgl_id_t src = 0; src < NumVertices(); ++src) {
std::unordered_set<dgl_id_t> hashmap;
for (dgl_id_t eid = indptr_data[src]; eid < indptr_data[src+1]; ++eid) {
const dgl_id_t dst = indices_data[eid];
if (hashmap.count(dst)) {
return true;
} else {
hashmap.insert(dst);
}
}
}
return false;
});
}
ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(dgl_id_t vid) const {
CSR::EdgeArray CSR::OutEdges(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
const int64_t off = this->indptr[vid];
const int64_t len = this->GetDegree(vid);
IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* edge_ids_data = static_cast<dgl_id_t*>(edge_ids_->data);
const dgl_id_t off = indptr_data[vid];
const int64_t len = OutDegree(vid);
IdArray src = NewIdArray(len);
IdArray dst = NewIdArray(len);
IdArray eid = NewIdArray(len);
dgl_id_t* src_data = static_cast<dgl_id_t*>(src->data);
dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eid->data);
for (int64_t i = 0; i < len; ++i) {
src_data[i] = this->indices[off + i];
eid_data[i] = this->edge_ids[off + i];
}
std::fill(dst_data, dst_data + len, vid);
return ImmutableGraph::EdgeArray{src, dst, eid};
std::fill(src_data, src_data + len, vid);
std::copy(indices_data + off, indices_data + off + len, dst_data);
std::copy(edge_ids_data + off, edge_ids_data + off + len, eid_data);
return CSR::EdgeArray{src, dst, eid};
}
ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(IdArray vids) const {
CSR::EdgeArray CSR::OutEdges(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* edge_ids_data = static_cast<dgl_id_t*>(edge_ids_->data);
const auto len = vids->shape[0];
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
int64_t rstlen = 0;
for (int64_t i = 0; i < len; ++i) {
dgl_id_t vid = vid_data[i];
CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
rstlen += this->GetDegree(vid);
rstlen += OutDegree(vid);
}
IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
IdArray src = NewIdArray(rstlen);
IdArray dst = NewIdArray(rstlen);
IdArray eid = NewIdArray(rstlen);
dgl_id_t* src_ptr = static_cast<dgl_id_t*>(src->data);
dgl_id_t* dst_ptr = static_cast<dgl_id_t*>(dst->data);
dgl_id_t* eid_ptr = static_cast<dgl_id_t*>(eid->data);
for (int64_t i = 0; i < len; ++i) {
dgl_id_t vid = vid_data[i];
int64_t off = this->indptr[vid];
const int64_t deg = this->GetDegree(vid);
const dgl_id_t vid = vid_data[i];
const dgl_id_t off = indptr_data[vid];
const int64_t deg = OutDegree(vid);
if (deg == 0)
continue;
const auto *pred = &this->indices[off];
const auto *eids = &this->edge_ids[off];
const auto *succ = indices_data + off;
const auto *eids = edge_ids_data + off;
for (int64_t j = 0; j < deg; ++j) {
*(src_ptr++) = pred[j];
*(dst_ptr++) = vid;
*(src_ptr++) = vid;
*(dst_ptr++) = succ[j];
*(eid_ptr++) = eids[j];
}
}
return ImmutableGraph::EdgeArray{src, dst, eid};
return CSR::EdgeArray{src, dst, eid};
}
DegreeArray ImmutableGraph::CSR::GetDegrees(IdArray vids) const {
DegreeArray CSR::OutDegrees(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const auto len = vids->shape[0];
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
......@@ -156,392 +241,57 @@ DegreeArray ImmutableGraph::CSR::GetDegrees(IdArray vids) const {
for (int64_t i = 0; i < len; ++i) {
const auto vid = vid_data[i];
CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
rst_data[i] = this->GetDegree(vid);
}
return rst;
}
class Bitmap {
const size_t size = 1024 * 1024 * 4;
const size_t mask = size - 1;
std::vector<bool> map;
size_t hash(dgl_id_t id) const {
return id & mask;
}
public:
Bitmap(const dgl_id_t *vid_data, int64_t len): map(size) {
for (int64_t i = 0; i < len; ++i) {
map[hash(vid_data[i])] = 1;
}
}
bool test(dgl_id_t id) const {
return map[hash(id)];
}
};
/*
* This uses a hashtable to check if a node is in the given node list.
*/
class HashTableChecker {
std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
// This bitmap is used as a bloom filter to remove some lookups.
// Hashtable is very slow. Using bloom filter can significantly speed up lookups.
Bitmap map;
/*
* This is to test if a vertex is in the induced subgraph.
* If it is, the edge on this vertex and the source vertex will be collected.
* `old_id` is the vertex we test, `old_eid` is the edge Id between the `old_id`
* and the source vertex. `col_idx` and `orig_eids` store the collected edges.
*/
void Collect(const dgl_id_t old_id, const dgl_id_t old_eid,
ImmutableGraph::CSR::vector<dgl_id_t> *col_idx,
std::vector<dgl_id_t> *orig_eids) {
if (!map.test(old_id))
return;
auto it = oldv2newv.find(old_id);
if (it != oldv2newv.end()) {
const dgl_id_t new_id = it->second;
col_idx->push_back(new_id);
if (orig_eids)
orig_eids->push_back(old_eid);
}
}
public:
HashTableChecker(const dgl_id_t *vid_data, int64_t len): map(vid_data, len) {
oldv2newv.reserve(len);
for (int64_t i = 0; i < len; ++i) {
oldv2newv[vid_data[i]] = i;
}
}
/*
* This is to collect edges from the neighborhood of a vertex.
* `neigh_idx`, `eids` and `row_len` indicates the neighbor list of the vertex.
* The collected edges are stored in `new_neigh_idx` and `orig_eids`.
*/
void CollectOnRow(const dgl_id_t neigh_idx[], const dgl_id_t eids[], size_t row_len,
ImmutableGraph::CSR::vector<dgl_id_t> *new_neigh_idx,
std::vector<dgl_id_t> *orig_eids) {
// TODO(zhengda) I need to make sure the column index in each row is sorted.
for (size_t j = 0; j < row_len; ++j) {
const dgl_id_t oldsucc = neigh_idx[j];
const dgl_id_t eid = eids[j];
Collect(oldsucc, eid, new_neigh_idx, orig_eids);
}
}
};
ImmutableGraph::EdgeList::Ptr ImmutableGraph::EdgeList::FromCSR(
const CSR::vector<int64_t>& indptr,
const CSR::vector<dgl_id_t>& indices,
const CSR::vector<dgl_id_t>& edge_ids,
bool in_csr) {
const auto n = indptr.size() - 1;
const auto len = edge_ids.size();
auto t = std::make_shared<EdgeList>(len, n);
for (size_t i = 0; i < indptr.size() - 1; i++) {
for (int64_t j = indptr[i]; j < indptr[i + 1]; j++) {
dgl_id_t row = i, col = indices[j];
if (in_csr)
std::swap(row, col);
t->register_edge(edge_ids[j], row, col);
}
}
return t;
}
std::pair<ImmutableGraph::CSR::Ptr, IdArray> ImmutableGraph::CSR::VertexSubgraph(
IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
const int64_t len = vids->shape[0];
HashTableChecker def_check(vid_data, len);
// check if vid_data is sorted.
CHECK(std::is_sorted(vid_data, vid_data + len)) << "The input vertex list has to be sorted";
// Collect the non-zero entries in from the original graph.
std::vector<dgl_id_t> orig_edge_ids;
orig_edge_ids.reserve(len);
auto sub_csr = std::make_shared<CSR>(len, len);
sub_csr->indptr[0] = 0;
for (int64_t i = 0; i < len; ++i) {
const dgl_id_t oldvid = vid_data[i];
CHECK_LT(oldvid, NumVertices()) << "Vertex Id " << oldvid << " isn't in a graph of "
<< NumVertices() << " vertices";
size_t row_start = indptr[oldvid];
size_t row_len = indptr[oldvid + 1] - indptr[oldvid];
def_check.CollectOnRow(&indices[row_start], &edge_ids[row_start], row_len,
&sub_csr->indices, &orig_edge_ids);
sub_csr->indptr[i + 1] = sub_csr->indices.size();
}
// Store the non-zeros in a subgraph with edge attributes of new edge ids.
sub_csr->edge_ids.resize(sub_csr->indices.size());
for (size_t i = 0; i < sub_csr->edge_ids.size(); i++)
sub_csr->edge_ids[i] = i;
IdArray rst_eids = IdArray::Empty({static_cast<int64_t>(orig_edge_ids.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* eid_data = static_cast<dgl_id_t*>(rst_eids->data);
std::copy(orig_edge_ids.begin(), orig_edge_ids.end(), eid_data);
return std::pair<ImmutableGraph::CSR::Ptr, IdArray>(sub_csr, rst_eids);
}
std::pair<ImmutableGraph::CSR::Ptr, IdArray> ImmutableGraph::CSR::EdgeSubgraph(
IdArray eids, EdgeList::Ptr edge_list) const {
// Return sub_csr and vids array.
CHECK(IsValidIdArray(eids)) << "Invalid edge id array.";
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
const int64_t len = eids->shape[0];
std::vector<dgl_id_t> nodes;
std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
std::vector<Edge> edges;
for (int64_t i = 0; i < len; i++) {
dgl_id_t src_id = edge_list->src_points[eid_data[i]];
dgl_id_t dst_id = edge_list->dst_points[eid_data[i]];
// pair<iterator, bool>, the second indicates whether the insertion is successful or not.
auto src_pair = oldv2newv.insert(std::make_pair(src_id, oldv2newv.size()));
auto dst_pair = oldv2newv.insert(std::make_pair(dst_id, oldv2newv.size()));
if (src_pair.second)
nodes.push_back(src_id);
if (dst_pair.second)
nodes.push_back(dst_id);
edges.push_back(Edge{src_pair.first->second, dst_pair.first->second, static_cast<dgl_id_t>(i)});
}
const size_t n = oldv2newv.size();
auto sub_csr = CSR::FromEdges(&edges, 0, n);
IdArray rst_vids = IdArray::Empty({static_cast<int64_t>(nodes.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* vid_data = static_cast<dgl_id_t*>(rst_vids->data);
std::copy(nodes.begin(), nodes.end(), vid_data);
return std::make_pair(sub_csr, rst_vids);
}
ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector<Edge> *edges,
int sort_on, uint64_t num_nodes) {
CHECK(sort_on == 0 || sort_on == 1) << "we must sort on the first or the second vector";
int other_end = sort_on == 1 ? 0 : 1;
// TODO(zhengda) we should sort in parallel.
std::sort(edges->begin(), edges->end(), [sort_on, other_end](const Edge &e1, const Edge &e2) {
if (e1.end_points[sort_on] == e2.end_points[sort_on]) {
return e1.end_points[other_end] < e2.end_points[other_end];
} else {
return e1.end_points[sort_on] < e2.end_points[sort_on];
}
});
auto t = std::make_shared<CSR>(0, 0);
t->indices.resize(edges->size());
t->edge_ids.resize(edges->size());
for (size_t i = 0; i < edges->size(); i++) {
t->indices[i] = edges->at(i).end_points[other_end];
CHECK(t->indices[i] < num_nodes);
t->edge_ids[i] = edges->at(i).edge_id;
dgl_id_t vid = edges->at(i).end_points[sort_on];
CHECK(vid < num_nodes);
while (vid > 0 && t->indptr.size() <= static_cast<size_t>(vid)) {
t->indptr.push_back(i);
}
CHECK(t->indptr.size() == vid + 1);
}
while (t->indptr.size() < num_nodes + 1) {
t->indptr.push_back(edges->size());
}
CHECK(t->indptr.size() == num_nodes + 1);
return t;
}
void ImmutableGraph::CSR::ReadAllEdges(std::vector<Edge> *edges) const {
edges->resize(NumEdges());
for (size_t i = 0; i < NumVertices(); i++) {
// If all the remaining nodes don't have edges.
if (indptr[i] == indptr[NumVertices()])
break;
const dgl_id_t *indices_begin = &indices[indptr[i]];
const dgl_id_t *eid_begin = &edge_ids[indptr[i]];
for (size_t j = 0; j < GetDegree(i); j++) {
Edge e;
e.end_points[0] = i;
e.end_points[1] = indices_begin[j];
e.edge_id = eid_begin[j];
(*edges)[indptr[i] + j] = e;
}
}
}
ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const {
std::vector<Edge> edges;
ReadAllEdges(&edges);
return FromEdges(&edges, 1, NumVertices());
}
ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph) : is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids)) << "Invalid vertex id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid vertex id array.";
CHECK(IsValidIdArray(edge_ids)) << "Invalid vertex id array.";
const int64_t len = src_ids->shape[0];
CHECK(len == dst_ids->shape[0]);
CHECK(len == edge_ids->shape[0]);
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
std::vector<Edge> edges(len);
for (size_t i = 0; i < edges.size(); i++) {
Edge e;
e.end_points[0] = src_data[i];
e.end_points[1] = dst_data[i];
e.edge_id = edge_data[i];
edges[i] = e;
}
in_csr_ = CSR::FromEdges(&edges, 1, num_nodes);
out_csr_ = CSR::FromEdges(&edges, 0, num_nodes);
}
BoolArray ImmutableGraph::HasVertices(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const auto len = vids->shape[0];
BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const uint64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
rst_data[i] = OutDegree(vid);
}
return rst;
}
bool ImmutableGraph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const {
if (!HasVertex(src) || !HasVertex(dst)) return false;
if (this->in_csr_) {
auto pred = this->in_csr_->GetIndexRef(dst);
return dgl::binary_search(pred.begin(), pred.end(), src);
} else {
CHECK(this->out_csr_) << "one of the CSRs must exist";
auto succ = this->out_csr_->GetIndexRef(src);
return dgl::binary_search(succ.begin(), succ.end(), dst);
}
}
BoolArray ImmutableGraph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const {
CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
const auto srclen = src_ids->shape[0];
const auto dstlen = dst_ids->shape[0];
const auto rstlen = std::max(srclen, dstlen);
BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
if (srclen == 1) {
// one-many
for (int64_t i = 0; i < dstlen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0;
}
} else if (dstlen == 1) {
// many-one
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0;
}
} else {
// many-many
CHECK(srclen == dstlen) << "Invalid src and dst id array.";
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0;
bool CSR::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const {
CHECK(HasVertex(src)) << "Invalid vertex id: " << src;
CHECK(HasVertex(dst)) << "Invalid vertex id: " << dst;
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
for (dgl_id_t i = indptr_data[src]; i < indptr_data[src+1]; ++i) {
if (indices_data[i] == dst) {
return true;
}
}
return rst;
}
IdArray ImmutableGraph::Predecessors(dgl_id_t vid, uint64_t radius) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
CHECK(radius >= 1) << "invalid radius: " << radius;
auto pred = this->GetInCSR()->GetIndexRef(vid);
const int64_t len = pred.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
std::copy(pred.begin(), pred.end(), rst_data);
return rst;
return false;
}
IdArray ImmutableGraph::Successors(dgl_id_t vid, uint64_t radius) const {
IdArray CSR::Successors(dgl_id_t vid, uint64_t radius) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
CHECK(radius >= 1) << "invalid radius: " << radius;
auto succ = this->GetOutCSR()->GetIndexRef(vid);
const int64_t len = succ.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
CHECK(radius == 1) << "invalid radius: " << radius;
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const int64_t len = indptr_data[vid + 1] - indptr_data[vid];
IdArray rst = NewIdArray(len);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
std::copy(succ.begin(), succ.end(), rst_data);
std::copy(indices_data + indptr_data[vid],
indices_data + indptr_data[vid + 1],
rst_data);
return rst;
}
DGLIdIters ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const {
CHECK(this->in_csr_);
auto pred = this->in_csr_->GetIndexRef(dst);
auto it = std::lower_bound(pred.begin(), pred.end(), src);
// If there doesn't exist edges between the two nodes.
if (it == pred.end() || *it != src) {
return DGLIdIters(it, it);
}
size_t off = it - in_csr_->indices.begin();
CHECK(off < in_csr_->indices.size());
auto start = in_csr_->edge_ids.begin() + off;
int64_t len = 0;
// There are edges between the source and the destination.
for (auto it1 = it; it1 != pred.end() && *it1 == src; it1++, len++) {}
return DGLIdIters(start, start + len);
}
DGLIdIters ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const {
CHECK(this->out_csr_);
auto succ = this->out_csr_->GetIndexRef(src);
auto it = std::lower_bound(succ.begin(), succ.end(), dst);
// If there doesn't exist edges between the two nodes.
if (it == succ.end() || *it != dst) {
return DGLIdIters(it, it);
}
size_t off = it - out_csr_->indices.begin();
CHECK(off < out_csr_->indices.size());
auto start = out_csr_->edge_ids.begin() + off;
int64_t len = 0;
// There are edges between the source and the destination.
for (auto it1 = it; it1 != succ.end() && *it1 == dst; it1++, len++) {}
return DGLIdIters(start, start + len);
}
IdArray ImmutableGraph::EdgeId(dgl_id_t src, dgl_id_t dst) const {
CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst;
auto edge_ids = in_csr_ ? GetInEdgeIdRef(src, dst) : GetOutEdgeIdRef(src, dst);
int64_t len = edge_ids.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
if (len > 0) {
std::copy(edge_ids.begin(), edge_ids.end(), rst_data);
IdArray CSR::EdgeId(dgl_id_t src, dgl_id_t dst) const {
// TODO(minjie): use more efficient binary search when the column indices
// are also sorted.
CHECK(HasVertex(src)) << "invalid vertex: " << src;
CHECK(HasVertex(dst)) << "invalid vertex: " << dst;
std::vector<dgl_id_t> ids;
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(edge_ids_->data);
for (dgl_id_t i = indptr_data[src]; i < indptr_data[src+1]; ++i) {
if (indices_data[i] == dst) {
ids.push_back(eid_data[i]);
}
}
return rst;
return VecToIdArray(ids);
}
ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_ids) const {
CSR::EdgeArray CSR::EdgeIds(IdArray src_ids, IdArray dst_ids) const {
// TODO(minjie): more efficient implementation for simple graph
CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
const auto srclen = src_ids->shape[0];
......@@ -555,187 +305,364 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(edge_ids_->data);
std::vector<dgl_id_t> src, dst, eid;
for (int64_t i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) {
const dgl_id_t src_id = src_data[i], dst_id = dst_data[j];
CHECK(HasVertex(src_id) && HasVertex(dst_id)) <<
"invalid edge: " << src_id << " -> " << dst_id;
auto edges = this->in_csr_ ? GetInEdgeIdRef(src_id, dst_id) : GetOutEdgeIdRef(src_id, dst_id);
for (size_t k = 0; k < edges.size(); k++) {
src.push_back(src_id);
dst.push_back(dst_id);
eid.push_back(edges[k]);
for (dgl_id_t i = indptr_data[src_id]; i < indptr_data[src_id+1]; ++i) {
if (indices_data[i] == dst_id) {
src.push_back(src_id);
dst.push_back(dst_id);
eid.push_back(eid_data[i]);
}
}
}
return CSR::EdgeArray{VecToIdArray(src), VecToIdArray(dst), VecToIdArray(eid)};
}
const int64_t rstlen = src.size();
IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
CSR::EdgeArray CSR::Edges(const std::string &order) const {
CHECK(order.empty() || order == std::string("srcdst"))
<< "COO only support Edges of order \"srcdst\","
<< " but got \"" << order << "\".";
const int64_t rstlen = NumEdges();
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
IdArray rst_src = NewIdArray(rstlen);
dgl_id_t* rst_src_data = static_cast<dgl_id_t*>(rst_src->data);
dgl_id_t* rst_dst_data = static_cast<dgl_id_t*>(rst_dst->data);
dgl_id_t* rst_eid_data = static_cast<dgl_id_t*>(rst_eid->data);
std::copy(src.begin(), src.end(), rst_src_data);
std::copy(dst.begin(), dst.end(), rst_dst_data);
std::copy(eid.begin(), eid.end(), rst_eid_data);
// If sorted, the returned edges are sorted by the source Id and dest Id.
for (dgl_id_t src = 0; src < NumVertices(); ++src) {
std::fill(rst_src_data + indptr_data[src],
rst_src_data + indptr_data[src + 1],
src);
}
return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid};
return CSR::EdgeArray{rst_src, indices_, edge_ids_};
}
std::pair<dgl_id_t, dgl_id_t> ImmutableGraph::FindEdge(dgl_id_t eid) const {
dgl_id_t row = 0, col = 0;
auto edge_list = GetEdgeList();
CHECK(eid < NumEdges()) << "Invalid edge id " << eid;
row = edge_list->src_points[eid];
col = edge_list->dst_points[eid];
CHECK(row < NumVertices() && col < NumVertices()) << "Invalid edge id " << eid;
return std::pair<dgl_id_t, dgl_id_t>(row, col);
Subgraph CSR::VertexSubgraph(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
IdHashMap hashmap(vids);
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
const int64_t len = vids->shape[0];
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(edge_ids_->data);
std::vector<dgl_id_t> sub_indptr, sub_indices, sub_eids, induced_edges;
sub_indptr.resize(len + 1, 0);
const dgl_id_t kInvalidId = len + 1;
for (int64_t i = 0; i < len; ++i) {
// NOTE: newv == i
const dgl_id_t oldv = vid_data[i];
CHECK(HasVertex(oldv)) << "Invalid vertex: " << oldv;
for (dgl_id_t olde = indptr_data[oldv]; olde < indptr_data[oldv+1]; ++olde) {
const dgl_id_t oldu = indices_data[olde];
const dgl_id_t newu = hashmap.Map(oldu, kInvalidId);
if (newu != kInvalidId) {
++sub_indptr[i];
sub_indices.push_back(newu);
induced_edges.push_back(eid_data[olde]);
}
}
}
sub_eids.resize(sub_indices.size());
std::iota(sub_eids.begin(), sub_eids.end(), 0);
// cumsum sub_indptr
for (int64_t i = 0, cumsum = 0; i < len; ++i) {
const dgl_id_t temp = sub_indptr[i];
sub_indptr[i] = cumsum;
cumsum += temp;
}
sub_indptr[len] = sub_indices.size();
CSRPtr subcsr(new CSR(
VecToIdArray(sub_indptr), VecToIdArray(sub_indices), VecToIdArray(sub_eids)));
return Subgraph{subcsr, vids, VecToIdArray(induced_edges)};
}
// complexity: time O(E + V), space O(1)
CSRPtr CSR::Transpose() const {
const int64_t N = NumVertices();
const int64_t M = NumEdges();
const dgl_id_t* Ap = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* Aj = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* Ax = static_cast<dgl_id_t*>(edge_ids_->data);
IdArray ret_indptr = NewIdArray(N + 1);
IdArray ret_indices = NewIdArray(M);
IdArray ret_edge_ids = NewIdArray(M);
dgl_id_t* Bp = static_cast<dgl_id_t*>(ret_indptr->data);
dgl_id_t* Bi = static_cast<dgl_id_t*>(ret_indices->data);
dgl_id_t* Bx = static_cast<dgl_id_t*>(ret_edge_ids->data);
std::fill(Bp, Bp + N, 0);
for (int64_t j = 0; j < M; ++j) {
Bp[Aj[j]]++;
}
// cumsum
for (int64_t i = 0, cumsum = 0; i < N; ++i) {
const dgl_id_t temp = Bp[i];
Bp[i] = cumsum;
cumsum += temp;
}
Bp[N] = M;
for (int64_t i = 0; i < N; ++i) {
for (dgl_id_t j = Ap[i]; j < Ap[i+1]; ++j) {
const dgl_id_t dst = Aj[j];
Bi[Bp[dst]] = i;
Bx[Bp[dst]] = Ax[j];
Bp[dst]++;
}
}
// correct the indptr
for (int64_t i = 0, last = 0; i <= N; ++i) {
dgl_id_t temp = Bp[i];
Bp[i] = last;
last = temp;
}
return CSRPtr(new CSR(ret_indptr, ret_indices, ret_edge_ids));
}
// complexity: time O(E + V), space O(1)
COOPtr CSR::ToCOO() const {
const dgl_id_t* indptr_data = static_cast<dgl_id_t*>(indptr_->data);
const dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices_->data);
const dgl_id_t* eid_data = static_cast<dgl_id_t*>(edge_ids_->data);
IdArray ret_src = NewIdArray(NumEdges());
IdArray ret_dst = NewIdArray(NumEdges());
dgl_id_t* ret_src_data = static_cast<dgl_id_t*>(ret_src->data);
dgl_id_t* ret_dst_data = static_cast<dgl_id_t*>(ret_dst->data);
// scatter by edge id
for (dgl_id_t src = 0; src < NumVertices(); ++src) {
for (dgl_id_t eid = indptr_data[src]; eid < indptr_data[src + 1]; ++eid) {
const dgl_id_t dst = indices_data[eid];
ret_src_data[eid_data[eid]] = src;
ret_dst_data[eid_data[eid]] = dst;
}
}
return COOPtr(new COO(NumVertices(), ret_src, ret_dst));
}
//////////////////////////////////////////////////////////
//
// COO graph implementation
//
//////////////////////////////////////////////////////////
COO::COO(int64_t num_vertices, IdArray src, IdArray dst)
: num_vertices_(num_vertices), src_(src), dst_(dst) {
CHECK(IsValidIdArray(src));
CHECK(IsValidIdArray(dst));
CHECK_EQ(src->shape[0], dst->shape[0]);
}
COO::COO(int64_t num_vertices, IdArray src, IdArray dst, bool is_multigraph)
: num_vertices_(num_vertices), src_(src), dst_(dst), is_multigraph_(is_multigraph) {
CHECK(IsValidIdArray(src));
CHECK(IsValidIdArray(dst));
CHECK_EQ(src->shape[0], dst->shape[0]);
}
bool COO::IsMultigraph() const {
// The lambda will be called the first time to initialize the is_multigraph flag.
return const_cast<COO*>(this)->is_multigraph_.Get([this] () {
std::unordered_set<std::pair<dgl_id_t, dgl_id_t>, PairHash> hashmap;
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_->data);
for (dgl_id_t eid = 0; eid < NumEdges(); ++eid) {
const auto& p = std::make_pair(src_data[eid], dst_data[eid]);
if (hashmap.count(p)) {
return true;
} else {
hashmap.insert(p);
}
}
return false;
});
}
ImmutableGraph::EdgeArray ImmutableGraph::FindEdges(IdArray eids) const {
COO::EdgeArray COO::FindEdges(IdArray eids) const {
CHECK(IsValidIdArray(eids)) << "Invalid edge id array";
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
int64_t len = eids->shape[0];
IdArray rst_src = IdArray::Empty({len}, eids->dtype, eids->ctx);
IdArray rst_dst = IdArray::Empty({len}, eids->dtype, eids->ctx);
IdArray rst_src = NewIdArray(len);
IdArray rst_dst = NewIdArray(len);
dgl_id_t* rst_src_data = static_cast<dgl_id_t*>(rst_src->data);
dgl_id_t* rst_dst_data = static_cast<dgl_id_t*>(rst_dst->data);
for (int64_t i = 0; i < len; i++) {
auto edge = ImmutableGraph::FindEdge(eid_data[i]);
auto edge = COO::FindEdge(eid_data[i]);
rst_src_data[i] = edge.first;
rst_dst_data[i] = edge.second;
}
return ImmutableGraph::EdgeArray{rst_src, rst_dst, eids};
return COO::EdgeArray{rst_src, rst_dst, eids};
}
ImmutableGraph::EdgeArray ImmutableGraph::Edges(const std::string &order) const {
int64_t rstlen = NumEdges();
IdArray rst_src = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray rst_dst = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray rst_eid = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_src_data = static_cast<dgl_id_t*>(rst_src->data);
dgl_id_t* rst_dst_data = static_cast<dgl_id_t*>(rst_dst->data);
COO::EdgeArray COO::Edges(const std::string &order) const {
const int64_t rstlen = NumEdges();
CHECK(order.empty() || order == std::string("eid"))
<< "COO only support Edges of order \"eid\", but got \""
<< order << "\".";
IdArray rst_eid = NewIdArray(rstlen);
dgl_id_t* rst_eid_data = static_cast<dgl_id_t*>(rst_eid->data);
std::iota(rst_eid_data, rst_eid_data + rstlen, 0);
return EdgeArray{src_, dst_, rst_eid};
}
Subgraph COO::EdgeSubgraph(IdArray eids) const {
CHECK(IsValidIdArray(eids));
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_->data);
const dgl_id_t* eids_data = static_cast<dgl_id_t*>(eids->data);
IdArray new_src = NewIdArray(eids->shape[0]);
IdArray new_dst = NewIdArray(eids->shape[0]);
dgl_id_t* new_src_data = static_cast<dgl_id_t*>(new_src->data);
dgl_id_t* new_dst_data = static_cast<dgl_id_t*>(new_dst->data);
dgl_id_t newid = 0;
std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
if (order == "srcdst") {
auto out_csr = GetOutCSR();
// If sorted, the returned edges are sorted by the source Id and dest Id.
for (size_t i = 0; i < out_csr->indptr.size() - 1; i++) {
std::fill(rst_src_data + out_csr->indptr[i], rst_src_data + out_csr->indptr[i + 1],
static_cast<dgl_id_t>(i));
for (int64_t i = 0; i < eids->shape[0]; ++i) {
const dgl_id_t eid = eids_data[i];
const dgl_id_t src = src_data[eid];
const dgl_id_t dst = dst_data[eid];
if (!oldv2newv.count(src)) {
oldv2newv[src] = newid++;
}
std::copy(out_csr->indices.begin(), out_csr->indices.end(), rst_dst_data);
std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), rst_eid_data);
} else if (order.empty() || order == "eid") {
std::vector<Edge> edges;
auto out_csr = GetOutCSR();
out_csr->ReadAllEdges(&edges);
std::sort(edges.begin(), edges.end(), [](const Edge &e1, const Edge &e2) {
return e1.edge_id < e2.edge_id;
});
for (size_t i = 0; i < edges.size(); i++) {
rst_src_data[i] = edges[i].end_points[0];
rst_dst_data[i] = edges[i].end_points[1];
rst_eid_data[i] = edges[i].edge_id;
if (!oldv2newv.count(dst)) {
oldv2newv[dst] = newid++;
}
} else {
LOG(FATAL) << "unsupported order " << order;
*(new_src_data++) = oldv2newv[src];
*(new_dst_data++) = oldv2newv[dst];
}
// induced nodes
IdArray induced_nodes = NewIdArray(newid);
dgl_id_t* induced_nodes_data = static_cast<dgl_id_t*>(induced_nodes->data);
for (const auto& kv : oldv2newv) {
induced_nodes_data[kv.second] = kv.first;
}
return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid};
COOPtr subcoo(new COO(newid, new_src, new_dst));
return Subgraph{subcoo, induced_nodes, eids};
}
Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const {
Subgraph subg;
std::pair<CSR::Ptr, IdArray> ret;
// We prefer to generate a subgraph for out-csr first.
if (out_csr_) {
ret = out_csr_->VertexSubgraph(vids);
subg.graph = GraphPtr(new ImmutableGraph(nullptr, ret.first, IsMultigraph()));
} else {
CHECK(in_csr_);
ret = in_csr_->VertexSubgraph(vids);
// When we generate a subgraph, it may be used by only accessing in-edges or out-edges.
// We don't need to generate both.
subg.graph = GraphPtr(new ImmutableGraph(ret.first, nullptr, IsMultigraph()));
// complexity: time O(E + V), space O(1)
CSRPtr COO::ToCSR() const {
const int64_t N = num_vertices_;
const int64_t M = src_->shape[0];
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_->data);
IdArray indptr = NewIdArray(N + 1);
IdArray indices = NewIdArray(M);
IdArray edge_ids = NewIdArray(M);
dgl_id_t* Bp = static_cast<dgl_id_t*>(indptr->data);
dgl_id_t* Bi = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* Bx = static_cast<dgl_id_t*>(edge_ids->data);
std::fill(Bp, Bp + N, 0);
for (int64_t i = 0; i < M; ++i) {
Bp[src_data[i]]++;
}
subg.induced_vertices = vids;
subg.induced_edges = ret.second;
return subg;
}
Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const {
Subgraph subg;
std::pair<CSR::Ptr, IdArray> ret;
auto edge_list = GetEdgeList();
if (out_csr_) {
ret = out_csr_->EdgeSubgraph(eids, edge_list);
subg.graph = GraphPtr(new ImmutableGraph(nullptr, ret.first, IsMultigraph()));
} else {
ret = in_csr_->EdgeSubgraph(eids, edge_list);
subg.graph = GraphPtr(new ImmutableGraph(ret.first, nullptr, IsMultigraph()));
// cumsum
for (int64_t i = 0, cumsum = 0; i < N; ++i) {
const dgl_id_t temp = Bp[i];
Bp[i] = cumsum;
cumsum += temp;
}
Bp[N] = M;
for (int64_t i = 0; i < M; ++i) {
const dgl_id_t src = src_data[i];
const dgl_id_t dst = dst_data[i];
Bi[Bp[src]] = dst;
Bx[Bp[src]] = i;
Bp[src]++;
}
subg.induced_edges = eids;
subg.induced_vertices = ret.second;
return subg;
// correct the indptr
for (int64_t i = 0, last = 0; i <= N; ++i) {
dgl_id_t temp = Bp[i];
Bp[i] = last;
last = temp;
}
return CSRPtr(new CSR(indptr, indices, edge_ids));
}
ImmutableGraph::CSRArray GetCSRArray(ImmutableGraph::CSR::Ptr csr, size_t start, size_t end) {
size_t num_rows = end - start;
size_t nnz = csr->indptr[end] - csr->indptr[start];
IdArray indptr = IdArray::Empty({static_cast<int64_t>(num_rows + 1)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({static_cast<int64_t>(nnz)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eids = IdArray::Empty({static_cast<int64_t>(nnz)},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
for (size_t i = start; i < end + 1; i++)
indptr_data[i - start] = csr->indptr[i] - csr->indptr[start];
std::copy(csr->indices.begin() + csr->indptr[start],
csr->indices.begin() + csr->indptr[end], indices_data);
std::copy(csr->edge_ids.begin() + csr->indptr[start],
csr->edge_ids.begin() + csr->indptr[end], eid_data);
return ImmutableGraph::CSRArray{indptr, indices, eids};
//////////////////////////////////////////////////////////
//
// immutable graph implementation
//
//////////////////////////////////////////////////////////
ImmutableGraph::EdgeArray ImmutableGraph::Edges(const std::string &order) const {
if (order.empty()) {
// arbitrary order
if (in_csr_) {
// transpose
const auto& edges = in_csr_->Edges(order);
return EdgeArray{edges.dst, edges.src, edges.id};
} else {
return AnyGraph()->Edges(order);
}
} else if (order == std::string("srcdst")) {
// TODO(minjie): CSR only guarantees "src" to be sorted.
// Maybe we should relax this requirement?
return GetOutCSR()->Edges(order);
} else if (order == std::string("eid")) {
return GetCOO()->Edges(order);
} else {
LOG(FATAL) << "Unsupported order request: " << order;
}
return {};
}
ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray(size_t start, size_t end) const {
return GetCSRArray(GetInCSR(), start, end);
Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const {
// We prefer to generate a subgraph from out-csr.
auto sg = GetOutCSR()->VertexSubgraph(vids);
CSRPtr subcsr = std::dynamic_pointer_cast<CSR>(sg.graph);
return Subgraph{GraphPtr(new ImmutableGraph(subcsr)),
sg.induced_vertices, sg.induced_edges};
}
ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray(size_t start, size_t end) const {
return GetCSRArray(GetOutCSR(), start, end);
Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const {
// We prefer to generate a subgraph from out-csr.
auto sg = GetCOO()->EdgeSubgraph(eids);
COOPtr subcoo = std::dynamic_pointer_cast<COO>(sg.graph);
return Subgraph{GraphPtr(new ImmutableGraph(subcoo)),
sg.induced_vertices, sg.induced_edges};
}
std::vector<IdArray> ImmutableGraph::GetAdj(bool transpose, const std::string &fmt) const {
if (fmt == "csr") {
CSRArray arrs = transpose ? this->GetOutCSRArray(0, NumVertices())
: this->GetInCSRArray(0, NumVertices());
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
int64_t num_edges = this->NumEdges();
IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
CSR::Ptr csr = transpose ? GetOutCSR() : GetInCSR();
int64_t *idx_data = static_cast<int64_t*>(idx->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(eid->data);
for (size_t i = 0; i < csr->indptr.size() - 1; i++) {
for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++)
idx_data[j] = i;
}
std::copy(csr->indices.begin(), csr->indices.end(), idx_data + num_edges);
std::copy(csr->edge_ids.begin(), csr->edge_ids.end(), eid_data);
return std::vector<IdArray>{idx, eid};
// TODO(minjie): Our current semantics of adjacency matrix is row for dst nodes and col for
// src nodes. Therefore, we need to flip the transpose flag. For example, transpose=False
// is equal to in edge CSR.
// We have this behavior because previously we use framework's SPMM and we don't cache
// reverse adj. This is not intuitive and also not consistent with networkx's
// to_scipy_sparse_matrix. With the upcoming custom kernel change, we should change the
// behavior and make row for src and col for dst.
if (fmt == std::string("csr")) {
return transpose? GetOutCSR()->GetAdj(false, "csr") : GetInCSR()->GetAdj(false, "csr");
} else if (fmt == std::string("coo")) {
return GetCOO()->GetAdj(!transpose, fmt);
} else {
LOG(FATAL) << "unsupported adjacency matrix format";
return std::vector<IdArray>();
LOG(FATAL) << "unsupported adjacency matrix format: " << fmt;
return {};
}
}
......
......@@ -85,10 +85,10 @@ DGL_REGISTER_GLOBAL("network._CAPI_SenderSendSubgraph")
CommunicatorHandle chandle = args[0];
int recv_id = args[1];
GraphHandle ghandle = args[2];
const IdArray node_mapping = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[3]));
const IdArray edge_mapping = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[4]));
const IdArray layer_offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[5]));
const IdArray flow_offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[6]));
const IdArray node_mapping = args[3];
const IdArray edge_mapping = args[4];
const IdArray layer_offsets = args[5];
const IdArray flow_offsets = args[6];
ImmutableGraph *ptr = static_cast<ImmutableGraph*>(ghandle);
network::Sender* sender = static_cast<network::Sender*>(chandle);
auto csr = ptr->GetInCSR();
......@@ -160,7 +160,7 @@ DGL_REGISTER_GLOBAL("network._CAPI_ReceiverRecvSubgraph")
int control = *buffer;
if (control == CONTROL_NODEFLOW) {
NodeFlow* nf = new NodeFlow();
ImmutableGraph::CSR::Ptr csr;
CSRPtr csr;
// Deserialize nodeflow from recv_data_buffer
network::DeserializeSampledSubgraph(buffer+sizeof(CONTROL_NODEFLOW),
&(csr),
......@@ -168,7 +168,7 @@ DGL_REGISTER_GLOBAL("network._CAPI_ReceiverRecvSubgraph")
&(nf->edge_mapping),
&(nf->layer_offsets),
&(nf->flow_offsets));
nf->graph = GraphPtr(new ImmutableGraph(csr, nullptr, false));
nf->graph = GraphPtr(new ImmutableGraph(csr, nullptr));
std::vector<NodeFlow*> subgs(1);
subgs[0] = nf;
*rv = WrapVectorReturn(subgs);
......
......@@ -18,7 +18,7 @@ namespace network {
const int kNumTensor = 7; // We need to serialize 7 conponents (tensor) here
int64_t SerializeSampledSubgraph(char* data,
const ImmutableGraph::CSR::Ptr csr,
const CSRPtr csr,
const IdArray& node_mapping,
const IdArray& edge_mapping,
const IdArray& layer_offsets,
......@@ -30,9 +30,9 @@ int64_t SerializeSampledSubgraph(char* data,
int64_t edge_mapping_size = edge_mapping->shape[0] * sizeof(dgl_id_t);
int64_t layer_offsets_size = layer_offsets->shape[0] * sizeof(dgl_id_t);
int64_t flow_offsets_size = flow_offsets->shape[0] * sizeof(dgl_id_t);
int64_t indptr_size = csr->indptr.size() * sizeof(int64_t);
int64_t indices_size = csr->indices.size() * sizeof(dgl_id_t);
int64_t edge_ids_size = csr->edge_ids.size() * sizeof(dgl_id_t);
int64_t indptr_size = csr->indptr().GetSize();
int64_t indices_size = csr->indices().GetSize();
int64_t edge_ids_size = csr->edge_ids().GetSize();
total_size += node_mapping_size;
total_size += edge_mapping_size;
total_size += layer_offsets_size;
......@@ -52,9 +52,9 @@ int64_t SerializeSampledSubgraph(char* data,
dgl_id_t* edge_map_data = static_cast<dgl_id_t*>(edge_mapping->data);
dgl_id_t* layer_off_data = static_cast<dgl_id_t*>(layer_offsets->data);
dgl_id_t* flow_off_data = static_cast<dgl_id_t*>(flow_offsets->data);
int64_t* indptr = static_cast<int64_t*>(csr->indptr.data());
dgl_id_t* indices = static_cast<dgl_id_t*>(csr->indices.data());
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr->edge_ids.data());
dgl_id_t* indptr = static_cast<dgl_id_t*>(csr->indptr()->data);
dgl_id_t* indices = static_cast<dgl_id_t*>(csr->indices()->data);
dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr->edge_ids()->data);
// node_mapping
*(reinterpret_cast<int64_t*>(data_ptr)) = node_mapping_size;
data_ptr += sizeof(int64_t);
......@@ -94,7 +94,7 @@ int64_t SerializeSampledSubgraph(char* data,
}
void DeserializeSampledSubgraph(char* data,
ImmutableGraph::CSR::Ptr* csr,
CSRPtr* csr,
IdArray* node_mapping,
IdArray* edge_mapping,
IdArray* layer_offsets,
......@@ -139,25 +139,24 @@ void DeserializeSampledSubgraph(char* data,
memcpy(edge_mapping_data, data_ptr, tensor_size);
data_ptr += tensor_size;
// Construct sub_csr_graph
*csr = std::make_shared<ImmutableGraph::CSR>(num_vertices, num_edges);
(*csr)->indices.resize(num_edges);
(*csr)->edge_ids.resize(num_edges);
// TODO(minjie): multigraph flag
*csr = CSRPtr(new CSR(num_vertices, num_edges, false));
// indices (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
dgl_id_t* col_list_out = (*csr)->indices.data();
dgl_id_t* col_list_out = static_cast<dgl_id_t*>((*csr)->indices()->data);
memcpy(col_list_out, data_ptr, tensor_size);
data_ptr += tensor_size;
// edge_ids (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
dgl_id_t* edge_ids = (*csr)->edge_ids.data();
dgl_id_t* edge_ids = static_cast<dgl_id_t*>((*csr)->edge_ids()->data);
memcpy(edge_ids, data_ptr, tensor_size);
data_ptr += tensor_size;
// indptr (CSR)
tensor_size = *(reinterpret_cast<int64_t*>(data_ptr));
data_ptr += sizeof(int64_t);
int64_t* indptr_out = (*csr)->indptr.data();
dgl_id_t* indptr_out = static_cast<dgl_id_t*>((*csr)->indptr()->data);
memcpy(indptr_out, data_ptr, tensor_size);
data_ptr += tensor_size;
}
......
......@@ -23,7 +23,7 @@ namespace network {
* \return the total size of the serialized binary data
*/
int64_t SerializeSampledSubgraph(char* data,
const ImmutableGraph::CSR::Ptr csr,
const CSRPtr csr,
const IdArray& node_mapping,
const IdArray& edge_mapping,
const IdArray& layer_offsets,
......@@ -39,7 +39,7 @@ int64_t SerializeSampledSubgraph(char* data,
* \param flow_offsets flow offsets in NodeFlowIndex
*/
void DeserializeSampledSubgraph(char* data,
ImmutableGraph::CSR::Ptr* csr,
CSRPtr* csr,
IdArray* node_mapping,
IdArray* edge_mapping,
IdArray* layer_offsets,
......
......@@ -11,38 +11,42 @@
#include "../c_api_common.h"
using dgl::runtime::DGLArgs;
using dgl::runtime::DGLArgValue;
using dgl::runtime::DGLRetValue;
using dgl::runtime::PackedFunc;
namespace dgl {
std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::string &fmt,
size_t layer0_size, size_t layer1_start,
size_t layer1_end, bool remap) {
CHECK_GE(layer1_start, layer0_size);
if (fmt == "csr") {
if (fmt == std::string("csr")) {
dgl_id_t first_vid = layer1_start - layer0_size;
ImmutableGraph::CSRArray arrs = graph.GetInCSRArray(layer1_start, layer1_end);
CSRMatrix csr = SliceRows(graph.GetInCSR()->ToCSRMatrix(), layer1_start, layer1_end);
if (remap) {
dgl_id_t *indices_data = static_cast<dgl_id_t*>(arrs.indices->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(arrs.id->data);
const size_t len = arrs.indices->shape[0];
dgl_id_t first_eid = eid_data[0];
for (size_t i = 0; i < len; i++) {
CHECK_GE(indices_data[i], first_vid);
indices_data[i] -= first_vid;
CHECK_GE(eid_data[i], first_eid);
eid_data[i] -= first_eid;
}
dgl_id_t *eid_data = static_cast<dgl_id_t*>(csr.data->data);
const dgl_id_t first_eid = eid_data[0];
IdArray new_indices = Sub(csr.indices, first_vid);
IdArray new_data = Sub(csr.data, first_eid);
return {csr.indptr, new_indices, new_data};
} else {
return {csr.indptr, csr.indices, csr.data};
}
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
ImmutableGraph::CSR::Ptr csr = graph.GetInCSR();
int64_t nnz = csr->indptr[layer1_end] - csr->indptr[layer1_start];
IdArray idx = IdArray::Empty({2 * nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({nnz}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
} else if (fmt == std::string("coo")) {
CSRMatrix csr = graph.GetInCSR()->ToCSRMatrix();
const dgl_id_t* indptr = static_cast<dgl_id_t*>(csr.indptr->data);
const dgl_id_t* indices = static_cast<dgl_id_t*>(csr.indices->data);
const dgl_id_t* edge_ids = static_cast<dgl_id_t*>(csr.data->data);
int64_t nnz = indptr[layer1_end] - indptr[layer1_start];
IdArray idx = NewIdArray(2 * nnz);
IdArray eid = NewIdArray(nnz);
int64_t *idx_data = static_cast<int64_t*>(idx->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(eid->data);
size_t num_edges = 0;
for (size_t i = layer1_start; i < layer1_end; i++) {
for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++) {
for (dgl_id_t j = indptr[i]; j < indptr[i + 1]; j++) {
// These nodes are all in a layer. We need to remap them to the node id
// local to the layer.
idx_data[num_edges] = remap ? i - layer1_start : i;
......@@ -51,25 +55,38 @@ std::vector<IdArray> GetNodeFlowSlice(const ImmutableGraph &graph, const std::st
}
CHECK_EQ(num_edges, nnz);
if (remap) {
size_t edge_start = csr->indptr[layer1_start];
dgl_id_t first_eid = csr->edge_ids[edge_start];
size_t edge_start = indptr[layer1_start];
dgl_id_t first_eid = edge_ids[edge_start];
dgl_id_t first_vid = layer1_start - layer0_size;
for (int64_t i = 0; i < nnz; i++) {
CHECK_GE(csr->indices[edge_start + i], first_vid);
idx_data[nnz + i] = csr->indices[edge_start + i] - first_vid;
eid_data[i] = csr->edge_ids[edge_start + i] - first_eid;
CHECK_GE(indices[edge_start + i], first_vid);
idx_data[nnz + i] = indices[edge_start + i] - first_vid;
eid_data[i] = edge_ids[edge_start + i] - first_eid;
}
} else {
std::copy(csr->indices.begin() + csr->indptr[layer1_start],
csr->indices.begin() + csr->indptr[layer1_end], idx_data + nnz);
std::copy(csr->edge_ids.begin() + csr->indptr[layer1_start],
csr->edge_ids.begin() + csr->indptr[layer1_end], eid_data);
std::copy(indices + indptr[layer1_start],
indices + indptr[layer1_end], idx_data + nnz);
std::copy(edge_ids + indptr[layer1_start],
edge_ids + indptr[layer1_end], eid_data);
}
return std::vector<IdArray>{idx, eid};
} else {
LOG(FATAL) << "unsupported adjacency matrix format";
return std::vector<IdArray>();
return {};
}
}
DGL_REGISTER_GLOBAL("nodeflow._CAPI_NodeFlowGetBlockAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::string format = args[1];
int64_t layer0_size = args[2];
int64_t start = args[3];
int64_t end = args[4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph* gptr = dynamic_cast<const ImmutableGraph*>(ptr);
auto res = GetNodeFlowSlice(*gptr, format, layer0_size, start, end, true);
*rv = ConvertNDArrayVectorToPackedFunc(res);
});
} // namespace dgl
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment