Unverified Commit 929742b5 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Feature] Reimplement Immutable graph index in DGL (#342)

* init.

* it's compiled.

* add immutable graph constructor.

* add immutable graph API.

* fix.

* impl get adjacency matrix.

* fix.

* fix graph_index from scipy matrix.

* add neighbor sampling.

* remap vertex ids.

* fix.

* move sampler test.

* fix tests.

* add comments

* remove mxnet-specific immutable graph.

* fix.

* fix lint.

* fix.

* try to fix windows compile error.

* fix.

* fix.

* add test.

* unify Graph and ImmutableGraph.

* fix bugs.

* fix compile.

* move immutable graph.

* fix.

* remove print.

* fix lint.

* fix

* fix lint.

* fix lint.

* fix test.

* fix comments.

* merge GraphIndex and ImmutableGraphIndex.

* temp fix.

* impl GetAdj.

* fix lint

* fix.

* fix.

* fix.

* fix.

* fix.

* use csr only for readonly graph.

* Revert "use csr only for readonly graph."

This reverts commit 8e24bb033af8504531b22849de5b7567b168e0d5.

* remove code.

* fix.

* fix.

* fix.

* fix.

* fix.

* fix.

* address comments.

* fix for comments.

* fix comments.

* revert.

* move test_graph_index to compute.

* fix.

* fix.

* impl GetAdj for coo.

* fix.

* fix tests.

* address comments.

* address comments.

* fix comment.

* address comments.

* use lambda.

* other comments.

* address comments.

* modify the semantics of edges.

* fix order.

* use DGLIdIter

* fix.

* remove NotImplemented.

* revert some code.
parent ed1948b5
...@@ -10,26 +10,18 @@ ...@@ -10,26 +10,18 @@
#include <cstdint> #include <cstdint>
#include <utility> #include <utility>
#include <tuple> #include <tuple>
#include "runtime/ndarray.h"
namespace dgl { #include "graph_interface.h"
typedef uint64_t dgl_id_t; namespace dgl {
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
class Graph; class Graph;
class GraphOp; class GraphOp;
struct Subgraph;
/*! /*!
* \brief Base dgl graph index class. * \brief Base dgl graph index class.
* *
* DGL's graph is directed. Vertices are integers enumerated from zero. Edges * DGL's graph is directed. Vertices are integers enumerated from zero.
* are uniquely identified by the two endpoints. Multi-edge is currently not
* supported.
* *
* Removal of vertices/edges is not allowed. Instead, the graph can only be "cleared" * Removal of vertices/edges is not allowed. Instead, the graph can only be "cleared"
* by removing all the vertices and edges. * by removing all the vertices and edges.
...@@ -40,17 +32,15 @@ struct Subgraph; ...@@ -40,17 +32,15 @@ struct Subgraph;
* If the length of src id array is one, it represents one-many connections. * If the length of src id array is one, it represents one-many connections.
* If the length of dst id array is one, it represents many-one connections. * If the length of dst id array is one, it represents many-one connections.
*/ */
class Graph { class Graph: public GraphInterface {
public: public:
/* \brief structure used to represent a list of edges */
typedef struct {
/* \brief the two endpoints and the id of the edge */
IdArray src, dst, id;
} EdgeArray;
/*! \brief default constructor */ /*! \brief default constructor */
explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {} explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief construct a graph from the coo format. */
Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
/*! \brief default copy constructor */ /*! \brief default copy constructor */
Graph(const Graph& other) = default; Graph(const Graph& other) = default;
...@@ -118,6 +108,13 @@ class Graph { ...@@ -118,6 +108,13 @@ class Graph {
return is_multigraph_; return is_multigraph_;
} }
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const {
return false;
}
/*! \return the number of vertices in the graph.*/ /*! \return the number of vertices in the graph.*/
uint64_t NumVertices() const { uint64_t NumVertices() const {
return adjlist_.size(); return adjlist_.size();
...@@ -232,7 +229,7 @@ class Graph { ...@@ -232,7 +229,7 @@ class Graph {
* \param sorted Whether the returned edge list is sorted by their src and dst ids * \param sorted Whether the returned edge list is sorted by their src and dst ids
* \return the id arrays of the two endpoints of the edges. * \return the id arrays of the two endpoints of the edges.
*/ */
EdgeArray Edges(bool sorted = false) const; EdgeArray Edges(const std::string &order = "") const;
/*! /*!
* \brief Get the in degree of the given vertex. * \brief Get the in degree of the given vertex.
...@@ -311,15 +308,15 @@ class Graph { ...@@ -311,15 +308,15 @@ class Graph {
* *
* \return the reversed graph * \return the reversed graph
*/ */
Graph Reverse() const; GraphPtr Reverse() const;
/*! /*!
* \brief Return the successor vector * \brief Return the successor vector
* \param vid The vertex id. * \param vid The vertex id.
* \return the successor vector * \return the successor vector
*/ */
const std::vector<dgl_id_t>& SuccVec(dgl_id_t vid) const { DGLIdIters SuccVec(dgl_id_t vid) const {
return adjlist_[vid].succ; return DGLIdIters(adjlist_[vid].succ.begin(), adjlist_[vid].succ.end());
} }
/*! /*!
...@@ -327,8 +324,8 @@ class Graph { ...@@ -327,8 +324,8 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the out edge id vector * \return the out edge id vector
*/ */
const std::vector<dgl_id_t>& OutEdgeVec(dgl_id_t vid) const { DGLIdIters OutEdgeVec(dgl_id_t vid) const {
return adjlist_[vid].edge_id; return DGLIdIters(adjlist_[vid].edge_id.begin(), adjlist_[vid].edge_id.end());
} }
/*! /*!
...@@ -336,8 +333,8 @@ class Graph { ...@@ -336,8 +333,8 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the predecessor vector * \return the predecessor vector
*/ */
const std::vector<dgl_id_t>& PredVec(dgl_id_t vid) const { DGLIdIters PredVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].succ; return DGLIdIters(reverse_adjlist_[vid].succ.begin(), reverse_adjlist_[vid].succ.end());
} }
/*! /*!
...@@ -345,8 +342,41 @@ class Graph { ...@@ -345,8 +342,41 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the in edge id vector * \return the in edge id vector
*/ */
const std::vector<dgl_id_t>& InEdgeVec(dgl_id_t vid) const { DGLIdIters InEdgeVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].edge_id; return DGLIdIters(reverse_adjlist_[vid].edge_id.begin(),
reverse_adjlist_[vid].edge_id.end());
}
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() {
Graph* gptr = new Graph();
*gptr = std::move(*this);
return gptr;
}
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of three IdArray.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const;
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const {
LOG(FATAL) << "NeighborUniformSample isn't supported in mutable graph";
return SampledSubgraph();
} }
protected: protected:
...@@ -382,22 +412,6 @@ class Graph { ...@@ -382,22 +412,6 @@ class Graph {
uint64_t num_edges_ = 0; uint64_t num_edges_ = 0;
}; };
/*! \brief Subgraph data structure */
struct Subgraph {
/*! \brief The graph. */
Graph graph;
/*!
* \brief The induced vertex ids.
* \note This is also a map from the new vertex id to the vertex id in the parent graph.
*/
IdArray induced_vertices;
/*!
* \brief The induced edge ids.
* \note This is also a map from the new edge id to the edge id in the parent graph.
*/
IdArray induced_edges;
};
} // namespace dgl } // namespace dgl
#endif // DGL_GRAPH_H_ #endif // DGL_GRAPH_H_
/*!
* Copyright (c) 2018 by Contributors
* \file dgl/graph_interface.h
* \brief DGL graph index class.
*/
#ifndef DGL_GRAPH_INTERFACE_H_
#define DGL_GRAPH_INTERFACE_H_
#include <string>
#include <vector>
#include <utility>
#include "runtime/ndarray.h"
namespace dgl {
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
struct Subgraph;
struct SampledSubgraph;
/*!
* \brief This class references data in std::vector.
*
* This isn't a STL-style iterator. It provides a STL data container interface.
* but it doesn't own data itself. instead, it only references data in std::vector.
*/
class DGLIdIters {
std::vector<dgl_id_t>::const_iterator begin_, end_;
public:
DGLIdIters(std::vector<dgl_id_t>::const_iterator begin,
std::vector<dgl_id_t>::const_iterator end) {
this->begin_ = begin;
this->end_ = end;
}
std::vector<dgl_id_t>::const_iterator begin() const {
return this->begin_;
}
std::vector<dgl_id_t>::const_iterator end() const {
return this->end_;
}
dgl_id_t operator[](int64_t i) const {
return *(this->begin_ + i);
}
size_t size() const {
return this->end_ - this->begin_;
}
};
class GraphInterface;
typedef std::shared_ptr<GraphInterface> GraphPtr;
/*!
* \brief dgl graph index interface.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
*/
class GraphInterface {
public:
/* \brief structure used to represent a list of edges */
typedef struct {
/* \brief the two endpoints and the id of the edge */
IdArray src, dst, id;
} EdgeArray;
virtual ~GraphInterface() = default;
/*!
* \brief Add vertices to the graph.
* \note Since vertices are integers enumerated from zero, only the number of
* vertices to be added needs to be specified.
* \param num_vertices The number of vertices to be added.
*/
virtual void AddVertices(uint64_t num_vertices) = 0;
/*!
* \brief Add one edge to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
*/
virtual void AddEdge(dgl_id_t src, dgl_id_t dst) = 0;
/*!
* \brief Add edges to the graph.
* \param src_ids The source vertex id array.
* \param dst_ids The destination vertex id array.
*/
virtual void AddEdges(IdArray src_ids, IdArray dst_ids) = 0;
/*!
* \brief Clear the graph. Remove all vertices/edges.
*/
virtual void Clear() = 0;
/*!
* \note not const since we have caches
* \return whether the graph is a multigraph
*/
virtual bool IsMultigraph() const = 0;
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const = 0;
/*! \return the number of vertices in the graph.*/
virtual uint64_t NumVertices() const = 0;
/*! \return the number of edges in the graph.*/
virtual uint64_t NumEdges() const = 0;
/*! \return true if the given vertex is in the graph.*/
virtual bool HasVertex(dgl_id_t vid) const = 0;
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
virtual BoolArray HasVertices(IdArray vids) const = 0;
/*! \return true if the given edge is in the graph.*/
virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0;
/*!
* \brief Find the predecessors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the predecessor id array.
*/
virtual IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const = 0;
/*!
* \brief Find the successors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the successor id array.
*/
virtual IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const = 0;
/*!
* \brief Get all edge ids between the two given endpoints
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
* \return the edge id array.
*/
virtual IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const = 0;
/*!
* \brief Get all edge ids between the given endpoint pairs.
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* If duplicate pairs exist, the returned edge IDs will also duplicate.
* The order of returned edge IDs will follow the order of src-dst pairs
* first, and ties are broken by the order of edge ID.
* \return EdgeArray containing all edges between all pairs.
*/
virtual EdgeArray EdgeIds(IdArray src, IdArray dst) const = 0;
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
virtual std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const = 0;
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
* \return EdgeArray containing all edges with id in eid. The order is preserved.
*/
virtual EdgeArray FindEdges(IdArray eids) const = 0;
/*!
* \brief Get the in edges of the vertex.
* \note The returned dst id array is filled with vid.
* \param vid The vertex id.
* \return the edges
*/
virtual EdgeArray InEdges(dgl_id_t vid) const = 0;
/*!
* \brief Get the in edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray InEdges(IdArray vids) const = 0;
/*!
* \brief Get the out edges of the vertex.
* \note The returned src id array is filled with vid.
* \param vid The vertex id.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray OutEdges(dgl_id_t vid) const = 0;
/*!
* \brief Get the out edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray OutEdges(IdArray vids) const = 0;
/*!
* \brief Get all the edges in the graph.
* \note If order is "srcdst", the returned edges list is sorted by their src and
* dst ids. If order is "eid", they are in their edge id order.
* Otherwise, in the arbitrary order.
* \param order The order of the returned edge list.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray Edges(const std::string &order = "") const = 0;
/*!
* \brief Get the in degree of the given vertex.
* \param vid The vertex id.
* \return the in degree
*/
virtual uint64_t InDegree(dgl_id_t vid) const = 0;
/*!
* \brief Get the in degrees of the given vertices.
* \param vid The vertex id array.
* \return the in degree array
*/
virtual DegreeArray InDegrees(IdArray vids) const = 0;
/*!
* \brief Get the out degree of the given vertex.
* \param vid The vertex id.
* \return the out degree
*/
virtual uint64_t OutDegree(dgl_id_t vid) const = 0;
/*!
* \brief Get the out degrees of the given vertices.
* \param vid The vertex id array.
* \return the out degree array
*/
virtual DegreeArray OutDegrees(IdArray vids) const = 0;
/*!
* \brief Construct the induced subgraph of the given vertices.
*
* The induced subgraph is a subgraph formed by specifying a set of vertices V' and then
* selecting all of the edges from the original graph that connect two vertices in V'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the vertices preserve the order of the given id array, while the local index
* of the edges preserve the index order in the original graph. Vertices not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param vids The vertices in the subgraph.
* \return the induced subgraph
*/
virtual Subgraph VertexSubgraph(IdArray vids) const = 0;
/*!
* \brief Construct the induced edge subgraph of the given edges.
*
* The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then
* selecting all of the nodes from the original graph that are endpoints in E'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the edges preserve the order of the given id array, while the local index
* of the vertices preserve the index order in the original graph. Edges not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param eids The edges in the subgraph.
* \return the induced edge subgraph
*/
virtual Subgraph EdgeSubgraph(IdArray eids) const = 0;
/*!
* \brief Return a new graph with all the edges reversed.
*
* The returned graph preserves the vertex and edge index in the original graph.
*
* \return the reversed graph
*/
virtual GraphPtr Reverse() const = 0;
/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector iterator pair.
*/
virtual DGLIdIters SuccVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector iterator pair.
*/
virtual DGLIdIters OutEdgeVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector iterator pair.
*/
virtual DGLIdIters PredVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector iterator pair.
*/
virtual DGLIdIters InEdgeVec(dgl_id_t vid) const = 0;
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() = 0;
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of IdArrays.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const = 0;
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const = 0;
};
/*! \brief Subgraph data structure */
struct Subgraph {
/*! \brief The graph. */
GraphPtr graph;
/*!
* \brief The induced vertex ids.
* \note This is also a map from the new vertex id to the vertex id in the parent graph.
*/
IdArray induced_vertices;
/*!
* \brief The induced edge ids.
* \note This is also a map from the new edge id to the edge id in the parent graph.
*/
IdArray induced_edges;
};
/*!
* \brief When we sample a subgraph, we need to store extra information,
* such as the layer Ids of the vertices and the sampling probability.
*/
struct SampledSubgraph: public Subgraph {
/*!
* \brief the layer of a sampled vertex in the subgraph.
*/
IdArray layer_ids;
/*!
* \brief the probability that a vertex is sampled.
*/
runtime::NDArray sample_prob;
};
} // namespace dgl
#endif // DGL_GRAPH_INTERFACE_H_
/*!
* Copyright (c) 2018 by Contributors
* \file dgl/immutable_graph.h
* \brief DGL immutable graph index class.
*/
#ifndef DGL_IMMUTABLE_GRAPH_H_
#define DGL_IMMUTABLE_GRAPH_H_
#include <vector>
#include <string>
#include <cstdint>
#include <utility>
#include <tuple>
#include "runtime/ndarray.h"
#include "graph_interface.h"
namespace dgl {
/*!
* \brief DGL immutable graph index class.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
*/
class ImmutableGraph: public GraphInterface {
public:
typedef struct {
IdArray indptr, indices, id;
} CSRArray;
struct Edge {
dgl_id_t end_points[2];
dgl_id_t edge_id;
};
struct CSR {
typedef std::shared_ptr<CSR> Ptr;
std::vector<int64_t> indptr;
std::vector<dgl_id_t> indices;
std::vector<dgl_id_t> edge_ids;
CSR(int64_t num_vertices, int64_t expected_num_edges) {
indptr.resize(num_vertices + 1);
indices.reserve(expected_num_edges);
edge_ids.reserve(expected_num_edges);
}
bool HasVertex(dgl_id_t vid) const {
return vid < NumVertices();
}
uint64_t NumVertices() const {
return indptr.size() - 1;
}
uint64_t NumEdges() const {
return indices.size();
}
int64_t GetDegree(dgl_id_t vid) const {
return indptr[vid + 1] - indptr[vid];
}
DegreeArray GetDegrees(IdArray vids) const;
EdgeArray GetEdges(dgl_id_t vid) const;
EdgeArray GetEdges(IdArray vids) const;
/* \brief this returns the start and end position of the column indices corresponding v. */
DGLIdIters GetIndexRef(dgl_id_t v) const {
const int64_t start = indptr[v];
const int64_t end = indptr[v + 1];
return DGLIdIters(indices.begin() + start, indices.begin() + end);
}
/*
* Read all edges and store them in the vector.
*/
void ReadAllEdges(std::vector<Edge> *edges) const;
CSR::Ptr Transpose() const;
std::pair<CSR::Ptr, IdArray> VertexSubgraph(IdArray vids) const;
/*
* Construct a CSR from a list of edges.
*
* When constructing a CSR, we need to sort the edge list. To reduce the overhead,
* we simply sort on the input edge list. We allow sorting on both end points of an edge,
* which is specified by `sort_on`.
*/
static CSR::Ptr FromEdges(std::vector<Edge> *edges, int sort_on, int64_t num_nodes);
};
/*! \brief Construct an immutable graph from the COO format. */
ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
/*!
* \brief Construct an immutable graph from the CSR format.
*
* For a single graph, we need two CSRs, one stores the in-edges of vertices and
* the other stores the out-edges of vertices. These two CSRs stores the same edges.
* The reason we need both is that some operators are faster on in-edge CSR and
* the other operators are faster on out-edge CSR.
*
* However, not both CSRs are required. Technically, one CSR contains all information.
* Thus, when we construct a temporary graphs (e.g., the sampled subgraphs), we only
* construct one of the CSRs that runs fast for some operations we expect and construct
* the other CSR on demand.
*/
ImmutableGraph(CSR::Ptr in_csr, CSR::Ptr out_csr,
bool multigraph = false) : is_multigraph_(multigraph) {
this->in_csr_ = in_csr;
this->out_csr_ = out_csr;
CHECK(this->in_csr_ != nullptr || this->out_csr_ != nullptr)
<< "there must exist one of the CSRs";
}
/*! \brief default constructor */
explicit ImmutableGraph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief default copy constructor */
ImmutableGraph(const ImmutableGraph& other) = default;
#ifndef _MSC_VER
/*! \brief default move constructor */
ImmutableGraph(ImmutableGraph&& other) = default;
#else
ImmutableGraph(ImmutableGraph&& other) {
this->in_csr_ = other.in_csr_;
this->out_csr_ = other.out_csr_;
this->is_multigraph_ = other.is_multigraph_;
other.in_csr_ = nullptr;
other.out_csr_ = nullptr;
}
#endif // _MSC_VER
/*! \brief default assign constructor */
ImmutableGraph& operator=(const ImmutableGraph& other) = default;
/*! \brief default destructor */
~ImmutableGraph() = default;
/*!
* \brief Add vertices to the graph.
* \note Since vertices are integers enumerated from zero, only the number of
* vertices to be added needs to be specified.
* \param num_vertices The number of vertices to be added.
*/
void AddVertices(uint64_t num_vertices) {
LOG(FATAL) << "AddVertices isn't supported in ImmutableGraph";
}
/*!
* \brief Add one edge to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
*/
void AddEdge(dgl_id_t src, dgl_id_t dst) {
LOG(FATAL) << "AddEdge isn't supported in ImmutableGraph";
}
/*!
* \brief Add edges to the graph.
* \param src_ids The source vertex id array.
* \param dst_ids The destination vertex id array.
*/
void AddEdges(IdArray src_ids, IdArray dst_ids) {
LOG(FATAL) << "AddEdges isn't supported in ImmutableGraph";
}
/*!
* \brief Clear the graph. Remove all vertices/edges.
*/
void Clear() {
LOG(FATAL) << "Clear isn't supported in ImmutableGraph";
}
/*!
* \note not const since we have caches
* \return whether the graph is a multigraph
*/
bool IsMultigraph() const {
return is_multigraph_;
}
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const {
return true;
}
/*! \return the number of vertices in the graph.*/
uint64_t NumVertices() const {
if (in_csr_)
return in_csr_->NumVertices();
else
return out_csr_->NumVertices();
}
/*! \return the number of edges in the graph.*/
uint64_t NumEdges() const {
if (in_csr_)
return in_csr_->NumEdges();
else
return out_csr_->NumEdges();
}
/*! \return true if the given vertex is in the graph.*/
bool HasVertex(dgl_id_t vid) const {
return vid < NumVertices();
}
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
BoolArray HasVertices(IdArray vids) const;
/*! \return true if the given edge is in the graph.*/
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const;
/*!
* \brief Find the predecessors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the predecessor id array.
*/
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const;
/*!
* \brief Find the successors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the successor id array.
*/
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const;
/*!
* \brief Get all edge ids between the two given endpoints
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
* \return the edge id array.
*/
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const;
/*!
* \brief Get all edge ids between the given endpoint pairs.
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* If duplicate pairs exist, the returned edge IDs will also duplicate.
* The order of returned edge IDs will follow the order of src-dst pairs
* first, and ties are broken by the order of edge ID.
* \return EdgeArray containing all edges between all pairs.
*/
EdgeArray EdgeIds(IdArray src, IdArray dst) const;
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const {
LOG(FATAL) << "FindEdge isn't supported in ImmutableGraph";
return std::pair<dgl_id_t, dgl_id_t>();
}
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
* \return EdgeArray containing all edges with id in eid. The order is preserved.
*/
EdgeArray FindEdges(IdArray eids) const {
LOG(FATAL) << "FindEdges isn't supported in ImmutableGraph";
return EdgeArray();
}
/*!
* \brief Get the in edges of the vertex.
* \note The returned dst id array is filled with vid.
* \param vid The vertex id.
* \return the edges
*/
EdgeArray InEdges(dgl_id_t vid) const {
return this->GetInCSR()->GetEdges(vid);
}
/*!
* \brief Get the in edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray InEdges(IdArray vids) const {
return this->GetInCSR()->GetEdges(vids);
}
/*!
* \brief Get the out edges of the vertex.
* \note The returned src id array is filled with vid.
* \param vid The vertex id.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(dgl_id_t vid) const {
auto ret = this->GetOutCSR()->GetEdges(vid);
// We should reverse the source and destination in the edge array.
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
}
/*!
* \brief Get the out edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(IdArray vids) const {
auto ret = this->GetOutCSR()->GetEdges(vids);
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
}
/*!
* \brief Get all the edges in the graph.
* \note If sorted is true, the returned edges list is sorted by their src and
* dst ids. Otherwise, they are in their edge id order.
* \param sorted Whether the returned edge list is sorted by their src and dst ids
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray Edges(const std::string &order = "") const;
/*!
* \brief Get the in degree of the given vertex.
* \param vid The vertex id.
* \return the in degree
*/
uint64_t InDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetInCSR()->GetDegree(vid);
}
/*!
* \brief Get the in degrees of the given vertices.
* \param vid The vertex id array.
* \return the in degree array
*/
DegreeArray InDegrees(IdArray vids) const {
return this->GetInCSR()->GetDegrees(vids);
}
/*!
* \brief Get the out degree of the given vertex.
* \param vid The vertex id.
* \return the out degree
*/
uint64_t OutDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetOutCSR()->GetDegree(vid);
}
/*!
* \brief Get the out degrees of the given vertices.
* \param vid The vertex id array.
* \return the out degree array
*/
DegreeArray OutDegrees(IdArray vids) const {
return this->GetOutCSR()->GetDegrees(vids);
}
/*!
* \brief Construct the induced subgraph of the given vertices.
*
* The induced subgraph is a subgraph formed by specifying a set of vertices V' and then
* selecting all of the edges from the original graph that connect two vertices in V'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the vertices preserve the order of the given id array, while the local index
* of the edges preserve the index order in the original graph. Vertices not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param vids The vertices in the subgraph.
* \return the induced subgraph
*/
Subgraph VertexSubgraph(IdArray vids) const;
/*!
* \brief Construct the induced edge subgraph of the given edges.
*
* The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then
* selecting all of the nodes from the original graph that are endpoints in E'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the edges preserve the order of the given id array, while the local index
* of the vertices preserve the index order in the original graph. Edges not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param eids The edges in the subgraph.
* \return the induced edge subgraph
*/
Subgraph EdgeSubgraph(IdArray eids) const;
/*!
* \brief Return a new graph with all the edges reversed.
*
* The returned graph preserves the vertex and edge index in the original graph.
*
* \return the reversed graph
*/
GraphPtr Reverse() const {
return GraphPtr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_));
}
/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector
*/
DGLIdIters SuccVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->indices.begin() + out_csr_->indptr[vid],
out_csr_->indices.begin() + out_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector
*/
DGLIdIters OutEdgeVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid],
out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector
*/
DGLIdIters PredVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->indices.begin() + in_csr_->indptr[vid],
in_csr_->indices.begin() + in_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector
*/
DGLIdIters InEdgeVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid],
in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]);
}
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() {
ImmutableGraph* gptr = new ImmutableGraph();
*gptr = std::move(*this);
return gptr;
}
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const;
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of three IdArray.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const;
protected:
DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
/*
* The immutable graph may only contain one of the CSRs (e.g., the sampled subgraphs).
* When we get in csr or out csr, we try to get the one cached in the structure.
* If not, we transpose the other one to get the one we need.
*/
CSR::Ptr GetInCSR() const {
if (in_csr_) {
return in_csr_;
} else {
CHECK(out_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->in_csr_ = out_csr_->Transpose();
return in_csr_;
}
}
CSR::Ptr GetOutCSR() const {
if (out_csr_) {
return out_csr_;
} else {
CHECK(in_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->out_csr_ = in_csr_->Transpose();
return out_csr_;
}
}
/*!
* \brief Get the CSR array that represents the in-edges.
* This method copies data from std::vector to IdArray.
* \return the CSR array.
*/
CSRArray GetInCSRArray() const;
/*!
* \brief Get the CSR array that represents the out-edges.
* This method copies data from std::vector to IdArray.
* \return the CSR array.
*/
CSRArray GetOutCSRArray() const;
SampledSubgraph SampleSubgraph(IdArray seed_arr, const float* probability,
const std::string &neigh_type,
int num_hops, size_t num_neighbor) const;
/*!
* \brief Compact a subgraph.
* In a sampled subgraph, the vertex Id is still in the ones in the original graph.
* We want to convert them to the subgraph Ids.
*/
void CompactSubgraph(IdArray induced_vertices);
// Store the in-edges.
CSR::Ptr in_csr_;
// Store the out-edges.
CSR::Ptr out_csr_;
/*!
* \brief Whether if this is a multigraph.
*
* When a multiedge is added, this flag switches to true.
*/
bool is_multigraph_ = false;
};
} // namespace dgl
#endif // DGL_IMMUTABLE_GRAPH_H_
...@@ -74,6 +74,19 @@ def tensor(data, dtype=None): ...@@ -74,6 +74,19 @@ def tensor(data, dtype=None):
""" """
pass pass
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
Returns
-------
string
the name of the preferred sparse matrix format.
"""
pass
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
"""Create a sparse matrix. """Create a sparse matrix.
...@@ -834,7 +847,3 @@ def zerocopy_from_numpy(np_array): ...@@ -834,7 +847,3 @@ def zerocopy_from_numpy(np_array):
# ---------------- # ----------------
# These are not related to tensors. Some of them are temporary workarounds that # These are not related to tensors. Some of them are temporary workarounds that
# should be included in DGL in the future. # should be included in DGL in the future.
def create_immutable_graph_index():
"""Create an immutable graph index object."""
pass
from .tensor import * from .tensor import *
from .immutable_graph_index import create_immutable_graph_index
from __future__ import absolute_import
import ctypes
import numpy as np
import networkx as nx
import scipy.sparse as sp
import mxnet as mx
class ImmutableGraphIndex(object):
"""Backend-specific graph index object on immutable graphs.
We can use a CSR matrix to represent a graph structure. For functionality,
one CSR matrix is sufficient. However, for efficient access
to in-edges and out-edges of a directed graph, we need to use two CSR matrices.
In these CSR matrices, both rows and columns represent vertices. In one CSR
matrix, a row stores in-edges of a vertex (whose source vertex is a neighbor
and destination vertex is the vertex itself). Thus, a non-zero entry is
the neighbor Id and the value is the corresponding edge Id.
The other CSR matrix stores the out-edges in the same fashion.
Parameters
----------
in_csr : a csr array that stores in-edges.
MXNet CSRArray
out_csr : a csr array that stores out-edges.
MXNet CSRArray
"""
def __init__(self, in_csr, out_csr):
self._in_csr = in_csr
self._out_csr = out_csr
self._cached_adj = {}
def number_of_nodes(self):
"""Return the number of nodes.
Returns
-------
int
The number of nodes
"""
return len(self._in_csr)
def number_of_edges(self):
"""Return the number of edges.
Returns
-------
int
The number of edges
"""
return self._in_csr.indices.shape[0]
def has_edges(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
0-1 array indicating existence
"""
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
return ids >= 0
def edge_ids(self, u, v):
"""Return the edge ids.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
Teh edge id array.
"""
if len(u) == 0 or len(v) == 0:
return [], [], []
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
ids = ids.asnumpy()
v = v.asnumpy()
u = u.asnumpy()
return u[ids >= 0], v[ids >= 0], ids[ids >= 0]
def predecessors(self, v, radius=1):
"""Return the predecessors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of predecessors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support predecessors with radius > 1 for now.')
return self._in_csr[v].indices
def successors(self, v, radius=1):
"""Return the successors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of successors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support successors with radius > 1 for now.')
return self._out_csr[v].indices
def in_edges(self, v):
"""Return the in edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The src nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._in_csr, v)
return rows.indptr, rows.indices, rows.data
def out_edges(self, v):
"""Return the out edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The dst nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._out_csr, v)
return rows.indptr, rows.indices, rows.data
def edges(self, sorted=False):
"""Return all the edges
Parameters
----------
sorted : bool
True if the returned edges are sorted by their src and dst ids.
Returns
-------
NDArray
The src nodes.
NDArray
The dst nodes.
NDArray
The edge ids.
"""
#TODO(zhengda) we need to return NDArray directly
# We don't need to take care of the sorted flag because the vertex Ids
# are already sorted.
coo = self._in_csr.asscipy().tocoo()
return coo.col, coo.row, coo.data
def get_in_degree(self):
"""Return the in degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._in_csr, axis=1)
def get_out_degree(self):
"""Return the out degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._out_csr, axis=1)
def node_subgraph(self, v):
"""Return the induced node subgraph.
Parameters
----------
v : NDArray
The nodes.
Returns
-------
ImmutableGraphIndex
The subgraph index.
NDArray
Induced nodes
NDArray
Induced edges
"""
v = mx.nd.sort(v)
# when return_mapping is turned on, dgl_subgraph returns another CSRArray that
# stores the edge Ids of the original graph.
csr = mx.nd.contrib.dgl_subgraph(self._in_csr, v, return_mapping=True)
induced_nodes = v
induced_edges = lambda: csr[1].data
return ImmutableGraphIndex(csr[0], None), induced_nodes, induced_edges
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a vector of NDArray
The nodes.
Returns
-------
a vector of ImmutableGraphIndex
The subgraph index.
a vector of NDArrays
Induced nodes of subgraphs.
a vector of NDArrays
Induced edges of subgraphs.
"""
vs_arr = [mx.nd.sort(v) for v in vs_arr]
res = mx.nd.contrib.dgl_subgraph(self._in_csr, *vs_arr, return_mapping=True)
in_csrs = res[0:len(vs_arr)]
induced_nodes = vs_arr
induced_edges = [lambda: e.data for e in res[len(vs_arr):]]
assert len(in_csrs) == len(induced_nodes)
assert len(in_csrs) == len(induced_edges)
gis = []
induced_ns = []
induced_es = []
for in_csr, induced_n, induced_e in zip(in_csrs, induced_nodes, induced_edges):
gis.append(ImmutableGraphIndex(in_csr, None))
induced_ns.append(induced_n)
induced_es.append(induced_e)
return gis, induced_ns, induced_es
def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type,
node_prob, max_subgraph_size):
if neighbor_type == 'in':
g = self._in_csr
elif neighbor_type == 'out':
g = self._out_csr
else:
raise NotImplementedError
num_nodes = []
num_subgs = len(seed_ids)
if node_prob is None:
res = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(g, *seed_ids, num_hops=num_hops,
num_neighbor=expand_factor,
max_num_vertices=max_subgraph_size)
else:
res = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(g, node_prob, *seed_ids, num_hops=num_hops,
num_neighbor=expand_factor,
max_num_vertices=max_subgraph_size)
vertices, subgraphs = res[0:num_subgs], res[num_subgs:(2*num_subgs)]
num_nodes = [subg_v[-1].asnumpy()[0] for subg_v in vertices]
inputs = []
inputs.extend(subgraphs)
inputs.extend(vertices)
compacts = mx.nd.contrib.dgl_graph_compact(*inputs, graph_sizes=num_nodes, return_mapping=False)
if isinstance(compacts, mx.nd.sparse.CSRNDArray):
compacts = [compacts]
if neighbor_type == 'in':
gis = [ImmutableGraphIndex(csr, None) for csr in compacts]
elif neighbor_type == 'out':
gis = [ImmutableGraphIndex(None, csr) for csr in compacts]
parent_nodes = [v[0:size] for v, size in zip(vertices, num_nodes)]
parent_edges = [lambda: e.data for e in subgraphs]
return gis, parent_nodes, parent_edges
def adjacency_matrix(self, transpose, ctx):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to transpose the returned adjacency matrix.
ctx : context
The device context of the returned matrix.
Returns
-------
NDArray
An object that returns tensor given context.
"""
if transpose:
mat = self._out_csr
else:
mat = self._in_csr
return mx.nd.contrib.dgl_adjacency(mat.as_in_context(ctx))
def from_coo_matrix(self, out_coo):
"""construct the graph index from a SciPy coo matrix.
Parameters
----------
out_coo : SciPy coo matrix
The non-zero entries indicate out-edges of the graph.
"""
edge_ids = mx.nd.arange(0, len(out_coo.data), step=1, repeat=1, dtype=np.int32)
src = mx.nd.array(out_coo.row, dtype=np.int64)
dst = mx.nd.array(out_coo.col, dtype=np.int64)
# TODO we can't generate a csr_matrix with np.int64 directly.
size = max(out_coo.shape)
self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=(size, size)).astype(np.int64),
mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=(size, size)).astype(np.int64))
def from_edge_list(self, elist):
"""Convert from an edge list.
Parameters
---------
elist : list
List of (u, v) edge tuple.
"""
src, dst = zip(*elist)
src = np.array(src)
dst = np.array(dst)
num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
edge_ids = mx.nd.arange(0, len(src), step=1, repeat=1, dtype=np.int32)
src = mx.nd.array(src, dtype=np.int64)
dst = mx.nd.array(dst, dtype=np.int64)
# TODO we can't generate a csr_matrix with np.int64 directly.
in_csr = mx.nd.sparse.csr_matrix((edge_ids, (dst, src)),
shape=(num_nodes, num_nodes)).astype(np.int64)
out_csr = mx.nd.sparse.csr_matrix((edge_ids, (src, dst)),
shape=(num_nodes, num_nodes)).astype(np.int64)
self.__init__(in_csr, out_csr)
def create_immutable_graph_index(in_csr=None, out_csr=None):
""" Create an empty backend-specific immutable graph index.
Parameters
----------
in_csr : MXNet CSRNDArray
The in-edge CSR array.
out_csr : MXNet CSRNDArray
The out-edge CSR array.
Returns
-------
ImmutableGraphIndex
The backend-specific immutable graph index.
"""
if in_csr is not None and not isinstance(in_csr, mx.nd.sparse.CSRNDArray):
raise TypeError()
if out_csr is not None and not isinstance(out_csr, mx.nd.sparse.CSRNDArray):
raise TypeError()
return ImmutableGraphIndex(in_csr, out_csr)
...@@ -28,6 +28,14 @@ def tensor(data, dtype=None): ...@@ -28,6 +28,14 @@ def tensor(data, dtype=None):
dtype = np.float32 dtype = np.float32
return nd.array(data, dtype=dtype) return nd.array(data, dtype=dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "csr"
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
if fmt == 'coo': if fmt == 'coo':
......
...@@ -22,6 +22,14 @@ def cpu(): ...@@ -22,6 +22,14 @@ def cpu():
def tensor(data, dtype=None): def tensor(data, dtype=None):
return np.array(data, dtype) return np.array(data, dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "csr"
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
if fmt == 'coo': if fmt == 'coo':
...@@ -142,5 +150,3 @@ def zerocopy_to_numpy(input): ...@@ -142,5 +150,3 @@ def zerocopy_to_numpy(input):
def zerocopy_from_numpy(np_array): def zerocopy_from_numpy(np_array):
return np_array return np_array
# create_immutable_graph_index not enabled
...@@ -23,6 +23,14 @@ def cpu(): ...@@ -23,6 +23,14 @@ def cpu():
def tensor(data, dtype=None): def tensor(data, dtype=None):
return th.tensor(data, dtype=dtype) return th.tensor(data, dtype=dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "coo"
if TH_VERSION.version[0] == 0: if TH_VERSION.version[0] == 0:
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
...@@ -64,7 +72,10 @@ def astype(input, ty): ...@@ -64,7 +72,10 @@ def astype(input, ty):
return input.type(ty) return input.type(ty)
def asnumpy(input): def asnumpy(input):
return input.cpu().numpy() if isinstance(input, th.sparse.FloatTensor):
return input.to_dense().cpu().numpy()
else:
return input.cpu().numpy()
def copy_to(input, ctx): def copy_to(input, ctx):
if ctx.type == 'cpu': if ctx.type == 'cpu':
...@@ -188,5 +199,3 @@ def zerocopy_to_numpy(input): ...@@ -188,5 +199,3 @@ def zerocopy_to_numpy(input):
def zerocopy_from_numpy(np_array): def zerocopy_from_numpy(np_array):
return th.from_numpy(np_array) return th.from_numpy(np_array)
# create_immutable_graph_index not enabled
# This file contains subgraph samplers. # This file contains subgraph samplers.
import sys
import numpy as np import numpy as np
import threading import threading
import random import random
...@@ -18,8 +19,7 @@ __all__ = ['NeighborSampler'] ...@@ -18,8 +19,7 @@ __all__ = ['NeighborSampler']
class NSSubgraphLoader(object): class NSSubgraphLoader(object):
def __init__(self, g, batch_size, expand_factor, num_hops=1, def __init__(self, g, batch_size, expand_factor, num_hops=1,
neighbor_type='in', node_prob=None, seed_nodes=None, neighbor_type='in', node_prob=None, seed_nodes=None,
shuffle=False, num_workers=1, max_subgraph_size=None, shuffle=False, num_workers=1, return_seed_id=False):
return_seed_id=False):
self._g = g self._g = g
if not g._graph.is_readonly(): if not g._graph.is_readonly():
raise NotImplementedError("subgraph loader only support read-only graphs.") raise NotImplementedError("subgraph loader only support read-only graphs.")
...@@ -38,11 +38,6 @@ class NSSubgraphLoader(object): ...@@ -38,11 +38,6 @@ class NSSubgraphLoader(object):
if shuffle: if shuffle:
self._seed_nodes = F.rand_shuffle(self._seed_nodes) self._seed_nodes = F.rand_shuffle(self._seed_nodes)
self._num_workers = num_workers self._num_workers = num_workers
if max_subgraph_size is None:
# This size is set temporarily.
self._max_subgraph_size = 1000000
else:
self._max_subgraph_size = max_subgraph_size
self._neighbor_type = neighbor_type self._neighbor_type = neighbor_type
self._subgraphs = [] self._subgraphs = []
self._seed_ids = [] self._seed_ids = []
...@@ -61,7 +56,7 @@ class NSSubgraphLoader(object): ...@@ -61,7 +56,7 @@ class NSSubgraphLoader(object):
self._subgraph_idx += 1 self._subgraph_idx += 1
sgi = self._g._graph.neighbor_sampling(seed_ids, self._expand_factor, sgi = self._g._graph.neighbor_sampling(seed_ids, self._expand_factor,
self._num_hops, self._neighbor_type, self._num_hops, self._neighbor_type,
self._node_prob, self._max_subgraph_size) self._node_prob)
subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \ subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \
i) for i in sgi] i) for i in sgi]
self._subgraphs.extend(subgraphs) self._subgraphs.extend(subgraphs)
...@@ -200,13 +195,10 @@ class _PrefetchingLoader(object): ...@@ -200,13 +195,10 @@ class _PrefetchingLoader(object):
def NeighborSampler(g, batch_size, expand_factor, num_hops=1, def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
neighbor_type='in', node_prob=None, seed_nodes=None, neighbor_type='in', node_prob=None, seed_nodes=None,
shuffle=False, num_workers=1, max_subgraph_size=None, shuffle=False, num_workers=1,
return_seed_id=False, prefetch=False): return_seed_id=False, prefetch=False):
'''Create a sampler that samples neighborhood. '''Create a sampler that samples neighborhood.
.. note:: This method currently only supports MXNet backend. Set
"DGLBACKEND" environment variable to "mxnet".
This creates a subgraph data loader that samples subgraphs from the input graph This creates a subgraph data loader that samples subgraphs from the input graph
with neighbor sampling. This sampling method is implemented in C and can perform with neighbor sampling. This sampling method is implemented in C and can perform
sampling very efficiently. sampling very efficiently.
...@@ -246,8 +238,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, ...@@ -246,8 +238,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
If it's None, the seed vertices are all vertices in the graph. If it's None, the seed vertices are all vertices in the graph.
shuffle: indicates the sampled subgraphs are shuffled. shuffle: indicates the sampled subgraphs are shuffled.
num_workers: the number of worker threads that sample subgraphs in parallel. num_workers: the number of worker threads that sample subgraphs in parallel.
max_subgraph_size: the maximal subgraph size in terms of the number of nodes.
GPU doesn't support very large subgraphs.
return_seed_id: indicates whether to return seed ids along with the subgraphs. return_seed_id: indicates whether to return seed ids along with the subgraphs.
The seed Ids are in the parent graph. The seed Ids are in the parent graph.
prefetch : bool, default False prefetch : bool, default False
...@@ -260,7 +250,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, ...@@ -260,7 +250,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
information about the subgraphs. information about the subgraphs.
''' '''
loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob, loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob,
seed_nodes, shuffle, num_workers, max_subgraph_size, return_seed_id) seed_nodes, shuffle, num_workers, return_seed_id)
if not prefetch: if not prefetch:
return loader return loader
else: else:
......
...@@ -915,7 +915,7 @@ class DGLGraph(object): ...@@ -915,7 +915,7 @@ class DGLGraph(object):
else: else:
raise DGLError('Invalid form:', form) raise DGLError('Invalid form:', form)
def all_edges(self, form='uv', return_sorted=False): def all_edges(self, form='uv', order=None):
"""Return all the edges. """Return all the edges.
Parameters Parameters
...@@ -926,8 +926,12 @@ class DGLGraph(object): ...@@ -926,8 +926,12 @@ class DGLGraph(object):
- 'all' : a tuple (u, v, eid) - 'all' : a tuple (u, v, eid)
- 'uv' : a pair (u, v), default - 'uv' : a pair (u, v), default
- 'eid' : one eid tensor - 'eid' : one eid tensor
return_sorted : bool order : string
True if the returned edges are sorted by their src and dst ids. The order of the returned edges. Currently support:
- 'srcdst' : sorted by their src and dst ids.
- 'eid' : sorted by edge Ids.
- None : the arbitrary order.
Returns Returns
------- -------
...@@ -953,7 +957,7 @@ class DGLGraph(object): ...@@ -953,7 +957,7 @@ class DGLGraph(object):
>>> G.all_edges('all') >>> G.all_edges('all')
(tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2])) (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
""" """
src, dst, eid = self._graph.edges(return_sorted) src, dst, eid = self._graph.edges(order)
if form == 'all': if form == 'all':
return (src.tousertensor(), dst.tousertensor(), eid.tousertensor()) return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
elif form == 'uv': elif form == 'uv':
......
...@@ -11,7 +11,6 @@ from ._ffi.function import _init_api ...@@ -11,7 +11,6 @@ from ._ffi.function import _init_api
from .base import DGLError from .base import DGLError
from . import backend as F from . import backend as F
from . import utils from . import utils
from .immutable_graph_index import create_immutable_graph_index
GraphIndexHandle = ctypes.c_void_p GraphIndexHandle = ctypes.c_void_p
...@@ -23,8 +22,10 @@ class GraphIndex(object): ...@@ -23,8 +22,10 @@ class GraphIndex(object):
handle : GraphIndexHandle handle : GraphIndexHandle
Handler Handler
""" """
def __init__(self, handle): def __init__(self, handle=None, multigraph=None, readonly=None):
self._handle = handle self._handle = handle
self._multigraph = multigraph
self._readonly = readonly
self._cache = {} self._cache = {}
def __del__(self): def __del__(self):
...@@ -35,21 +36,35 @@ class GraphIndex(object): ...@@ -35,21 +36,35 @@ class GraphIndex(object):
src, dst, _ = self.edges() src, dst, _ = self.edges()
n_nodes = self.number_of_nodes() n_nodes = self.number_of_nodes()
multigraph = self.is_multigraph() multigraph = self.is_multigraph()
readonly = self.is_readonly()
return n_nodes, multigraph, src, dst return n_nodes, multigraph, readonly, src, dst
def __setstate__(self, state): def __setstate__(self, state):
"""The pickle state of GraphIndex is defined as a triplet """The pickle state of GraphIndex is defined as a triplet
(number_of_nodes, multigraph, src_nodes, dst_nodes) (number_of_nodes, multigraph, readonly, src_nodes, dst_nodes)
""" """
n_nodes, multigraph, src, dst = state n_nodes, multigraph, readonly, src, dst = state
self._handle = _CAPI_DGLGraphCreate(multigraph) if readonly:
self._cache = {} self._readonly = readonly
self._multigraph = multigraph
self.init(src, dst, F.arange(0, len(src)), n_nodes)
else:
self._handle = _CAPI_DGLGraphCreateMutable(multigraph)
self._cache = {}
self.clear()
self.add_nodes(n_nodes)
self.add_edges(src, dst)
self.clear() def init(self, src_ids, dst_ids, edge_ids, num_nodes):
self.add_nodes(n_nodes) """The actual init function"""
self.add_edges(src, dst) assert len(src_ids) == len(dst_ids)
assert len(src_ids) == len(edge_ids)
self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(),
edge_ids.todgltensor(), self._multigraph, num_nodes,
self._readonly)
def add_nodes(self, num): def add_nodes(self, num):
"""Add nodes. """Add nodes.
...@@ -107,7 +122,9 @@ class GraphIndex(object): ...@@ -107,7 +122,9 @@ class GraphIndex(object):
bool bool
True if it is a multigraph, False otherwise. True if it is a multigraph, False otherwise.
""" """
return bool(_CAPI_DGLGraphIsMultigraph(self._handle)) if self._multigraph is None:
self._multigraph = bool(_CAPI_DGLGraphIsMultigraph(self._handle))
return self._multigraph
def is_readonly(self): def is_readonly(self):
"""Indicate whether the graph index is read-only. """Indicate whether the graph index is read-only.
...@@ -117,7 +134,9 @@ class GraphIndex(object): ...@@ -117,7 +134,9 @@ class GraphIndex(object):
bool bool
True if it is a read-only graph, False otherwise. True if it is a read-only graph, False otherwise.
""" """
return False if self._readonly is None:
self._readonly = bool(_CAPI_DGLGraphIsReadonly(self._handle))
return self._readonly
def number_of_nodes(self): def number_of_nodes(self):
"""Return the number of nodes. """Return the number of nodes.
...@@ -367,13 +386,17 @@ class GraphIndex(object): ...@@ -367,13 +386,17 @@ class GraphIndex(object):
return src, dst, eid return src, dst, eid
@utils.cached_member(cache='_cache', prefix='edges') @utils.cached_member(cache='_cache', prefix='edges')
def edges(self, return_sorted=False): def edges(self, order=None):
"""Return all the edges """Return all the edges
Parameters Parameters
---------- ----------
return_sorted : bool order : string
True if the returned edges are sorted by their src and dst ids. The order of the returned edges. Currently support:
- 'srcdst' : sorted by their src and dst ids.
- 'eid' : sorted by edge Ids.
- None : the arbitrary order.
Returns Returns
------- -------
...@@ -384,9 +407,11 @@ class GraphIndex(object): ...@@ -384,9 +407,11 @@ class GraphIndex(object):
utils.Index utils.Index
The edge ids. The edge ids.
""" """
key = 'edges_s%d' % return_sorted key = 'edges_s%s' % order
if key not in self._cache: if key not in self._cache:
edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted) if order is None:
order = ""
edge_array = _CAPI_DGLGraphEdges(self._handle, order)
src = utils.toindex(edge_array(0)) src = utils.toindex(edge_array(0))
dst = utils.toindex(edge_array(1)) dst = utils.toindex(edge_array(1))
eid = utils.toindex(edge_array(2)) eid = utils.toindex(edge_array(2))
...@@ -537,22 +562,27 @@ class GraphIndex(object): ...@@ -537,22 +562,27 @@ class GraphIndex(object):
if not isinstance(transpose, bool): if not isinstance(transpose, bool):
raise DGLError('Expect bool value for "transpose" arg,' raise DGLError('Expect bool value for "transpose" arg,'
' but got %s.' % (type(transpose))) ' but got %s.' % (type(transpose)))
src, dst, _ = self.edges(False) fmt = F.get_preferred_sparse_format()
src = src.tousertensor(ctx) # the index of the ctx will be cached rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt)
dst = dst.tousertensor(ctx) # the index of the ctx will be cached if fmt == "csr":
src = F.unsqueeze(src, dim=0) indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
dst = F.unsqueeze(dst, dim=0) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
if transpose: shuffle = utils.toindex(rst(2))
idx = F.cat([src, dst], dim=0) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
return F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0], shuffle
elif fmt == "coo":
## FIXME(minjie): data type
idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
m = self.number_of_edges()
idx = F.reshape(idx, (2, m))
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
n = self.number_of_nodes()
adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n))
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return adj, shuffle_idx
else: else:
idx = F.cat([dst, src], dim=0) raise Exception("unknown format")
n = self.number_of_nodes()
m = self.number_of_edges()
# FIXME(minjie): data type
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n))
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return adj, shuffle_idx
@utils.cached_member(cache='_cache', prefix='inc') @utils.cached_member(cache='_cache', prefix='inc')
def incidence_matrix(self, typestr, ctx): def incidence_matrix(self, typestr, ctx):
...@@ -590,7 +620,7 @@ class GraphIndex(object): ...@@ -590,7 +620,7 @@ class GraphIndex(object):
A index for data shuffling due to sparse format change. Return None A index for data shuffling due to sparse format change. Return None
if shuffle is not required. if shuffle is not required.
""" """
src, dst, eid = self.edges(False) src, dst, eid = self.edges()
src = src.tousertensor(ctx) # the index of the ctx will be cached src = src.tousertensor(ctx) # the index of the ctx will be cached
dst = dst.tousertensor(ctx) # the index of the ctx will be cached dst = dst.tousertensor(ctx) # the index of the ctx will be cached
eid = eid.tousertensor(ctx) # the index of the ctx will be cached eid = eid.tousertensor(ctx) # the index of the ctx will be cached
...@@ -631,6 +661,22 @@ class GraphIndex(object): ...@@ -631,6 +661,22 @@ class GraphIndex(object):
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return inc, shuffle_idx return inc, shuffle_idx
def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob):
"""Neighborhood sampling"""
if len(seed_ids) == 0:
return []
seed_ids = [v.todgltensor() for v in seed_ids]
num_subgs = len(seed_ids)
if node_prob is None:
rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor)
else:
rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops,
expand_factor)
return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
def to_networkx(self): def to_networkx(self):
"""Convert to networkx graph. """Convert to networkx graph.
...@@ -659,8 +705,6 @@ class GraphIndex(object): ...@@ -659,8 +705,6 @@ class GraphIndex(object):
nx_graph : networkx.DiGraph nx_graph : networkx.DiGraph
The nx graph The nx graph
""" """
self.clear()
if not isinstance(nx_graph, nx.Graph): if not isinstance(nx_graph, nx.Graph):
nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph() nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
else nx.DiGraph(nx_graph)) else nx.DiGraph(nx_graph))
...@@ -671,9 +715,13 @@ class GraphIndex(object): ...@@ -671,9 +715,13 @@ class GraphIndex(object):
nx_graph = nx_graph.to_directed() nx_graph = nx_graph.to_directed()
num_nodes = nx_graph.number_of_nodes() num_nodes = nx_graph.number_of_nodes()
self.add_nodes(num_nodes) if not self.is_readonly():
self.clear()
self.add_nodes(num_nodes)
if nx_graph.number_of_edges() == 0: if nx_graph.number_of_edges() == 0:
if self.is_readonly():
raise Exception("can't create an empty immutable graph")
return return
# nx_graph.edges(data=True) returns src, dst, attr_dict # nx_graph.edges(data=True) returns src, dst, attr_dict
...@@ -692,9 +740,14 @@ class GraphIndex(object): ...@@ -692,9 +740,14 @@ class GraphIndex(object):
for e in nx_graph.edges: for e in nx_graph.edges:
src.append(e[0]) src.append(e[0])
dst.append(e[1]) dst.append(e[1])
eid = np.arange(0, len(src), dtype=np.int64)
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
eid = utils.toindex(eid)
src = utils.toindex(src) src = utils.toindex(src)
dst = utils.toindex(dst) dst = utils.toindex(dst)
self.add_edges(src, dst) self.init(src, dst, eid, num_nodes)
def from_scipy_sparse_matrix(self, adj): def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix. """Convert from scipy sparse matrix.
...@@ -703,12 +756,17 @@ class GraphIndex(object): ...@@ -703,12 +756,17 @@ class GraphIndex(object):
---------- ----------
adj : scipy sparse matrix adj : scipy sparse matrix
""" """
self.clear() assert isinstance(adj, (scipy.sparse.csr_matrix, scipy.sparse.coo_matrix)), \
self.add_nodes(adj.shape[0]) "The input matrix has to be a SciPy sparse matrix."
if not self.is_readonly():
self.clear()
num_nodes = max(adj.shape[0], adj.shape[1])
adj_coo = adj.tocoo() adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row) src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col) dst = utils.toindex(adj_coo.col)
self.add_edges(src, dst) edge_ids = utils.toindex(F.arange(0, len(adj_coo.row)))
self.init(src, dst, edge_ids, num_nodes)
def from_edge_list(self, elist): def from_edge_list(self, elist):
"""Convert from an edge list. """Convert from an edge list.
...@@ -718,16 +776,19 @@ class GraphIndex(object): ...@@ -718,16 +776,19 @@ class GraphIndex(object):
elist : list elist : list
List of (u, v) edge tuple. List of (u, v) edge tuple.
""" """
self.clear() if not self.is_readonly():
self.clear()
src, dst = zip(*elist) src, dst = zip(*elist)
src = np.array(src) src = np.array(src)
dst = np.array(dst) dst = np.array(dst)
src_ids = utils.toindex(src)
dst_ids = utils.toindex(dst)
num_nodes = max(src.max(), dst.max()) + 1 num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min()) min_nodes = min(src.min(), dst.min())
if min_nodes != 0: if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.') raise DGLError('Invalid edge list. Nodes must start from 0.')
self.add_nodes(num_nodes) edge_ids = utils.toindex(F.arange(0, len(src)))
self.add_edges(utils.toindex(src), utils.toindex(dst)) self.init(src_ids, dst_ids, edge_ids, num_nodes)
def line_graph(self, backtracking=True): def line_graph(self, backtracking=True):
"""Return the line graph of this graph. """Return the line graph of this graph.
...@@ -761,7 +822,8 @@ class SubgraphIndex(GraphIndex): ...@@ -761,7 +822,8 @@ class SubgraphIndex(GraphIndex):
The parent edge ids in this subgraph. The parent edge ids in this subgraph.
""" """
def __init__(self, handle, parent, induced_nodes, induced_edges): def __init__(self, handle, parent, induced_nodes, induced_edges):
super(SubgraphIndex, self).__init__(handle) super(SubgraphIndex, self).__init__(parent.is_multigraph(), parent.is_readonly())
self._handle = handle
self._parent = parent self._parent = parent
self._induced_nodes = induced_nodes self._induced_nodes = induced_nodes
self._induced_edges = induced_edges self._induced_edges = induced_edges
...@@ -813,7 +875,7 @@ def map_to_subgraph_nid(subgraph, parent_nids): ...@@ -813,7 +875,7 @@ def map_to_subgraph_nid(subgraph, parent_nids):
Parameters Parameters
---------- ----------
subgraph: SubgraphIndex or ImmutableSubgraphIndex subgraph: SubgraphIndex
the graph index of a subgraph the graph index of a subgraph
parent_nids: utils.Index parent_nids: utils.Index
...@@ -900,12 +962,15 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): ...@@ -900,12 +962,15 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
return graph_data return graph_data
if readonly: if readonly:
return create_immutable_graph_index(graph_data) # FIXME(zhengda): we should construct a C graph index before constructing GraphIndex.
gidx = GraphIndex(None, multigraph, readonly)
handle = _CAPI_DGLGraphCreate(multigraph) else:
gidx = GraphIndex(handle) handle = _CAPI_DGLGraphCreateMutable(multigraph)
gidx = GraphIndex(handle, multigraph, readonly)
if graph_data is None: if graph_data is None and readonly:
raise Exception("can't create an empty immutable graph")
elif graph_data is None:
return gidx return gidx
# edge list # edge list
...@@ -933,4 +998,30 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): ...@@ -933,4 +998,30 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
return gidx return gidx
_init_api("dgl.graph_index") _init_api("dgl.graph_index")
# TODO(zhengda): we'll support variable-length inputs.
_NEIGHBOR_SAMPLING_APIS = {
1: _CAPI_DGLGraphUniformSampling,
2: _CAPI_DGLGraphUniformSampling2,
4: _CAPI_DGLGraphUniformSampling4,
8: _CAPI_DGLGraphUniformSampling8,
16: _CAPI_DGLGraphUniformSampling16,
32: _CAPI_DGLGraphUniformSampling32,
64: _CAPI_DGLGraphUniformSampling64,
128: _CAPI_DGLGraphUniformSampling128,
}
_EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))]
def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor):
num_seeds = len(seed_ids)
empty_ids = []
if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys():
remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids)
empty_ids = _EMPTY_ARRAYS[0:remain]
seed_ids.extend([empty.todgltensor() for empty in empty_ids])
assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys()
return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type,
num_hops, expand_factor, num_seeds)
"""Module for immutable graph index.
NOTE: this is currently a temporary solution.
"""
# pylint: disable=abstract-method,unused-argument
from __future__ import absolute_import
import numpy as np
import networkx as nx
import scipy.sparse as sp
from ._ffi.function import _init_api
from . import backend as F
from . import utils
from .base import DGLError
class ImmutableGraphIndex(object):
"""Graph index object on immutable graphs.
Parameters
----------
backend_csr: a csr array provided by the backend framework.
"""
def __init__(self, backend_sparse):
self._sparse = backend_sparse
self._num_nodes = None
self._num_edges = None
self._in_deg = None
self._out_deg = None
self._cache = {}
def add_nodes(self, num):
"""Add nodes.
Parameters
----------
num : int
Number of nodes to be added.
"""
raise DGLError('Immutable graph doesn\'t support adding nodes')
def add_edge(self, u, v):
"""Add one edge.
Parameters
----------
u : int
The src node.
v : int
The dst node.
"""
raise DGLError('Immutable graph doesn\'t support adding an edge')
def add_edges(self, u, v):
"""Add many edges.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
"""
raise DGLError('Immutable graph doesn\'t support adding edges')
def clear(self):
"""Clear the graph."""
raise DGLError('Immutable graph doesn\'t support clearing up')
def is_multigraph(self):
"""Return whether the graph is a multigraph
Returns
-------
bool
True if it is a multigraph, False otherwise.
"""
# Immutable graph doesn't support multi-edge.
return False
def is_readonly(self):
"""Indicate whether the graph index is read-only.
Returns
-------
bool
True if it is a read-only graph, False otherwise.
"""
return True
def number_of_nodes(self):
"""Return the number of nodes.
Returns
-------
int
The number of nodes
"""
if self._num_nodes is None:
self._num_nodes = self._sparse.number_of_nodes()
return self._num_nodes
def number_of_edges(self):
"""Return the number of edges.
Returns
-------
int
The number of edges
"""
if self._num_edges is None:
self._num_edges = self._sparse.number_of_edges()
return self._num_edges
def has_node(self, vid):
"""Return true if the node exists.
Parameters
----------
vid : int
The nodes
Returns
-------
bool
True if the node exists
"""
return vid < self.number_of_nodes()
def has_nodes(self, vids):
"""Return true if the nodes exist.
Parameters
----------
vid : utils.Index
The nodes
Returns
-------
utils.Index
0-1 array indicating existence
"""
vid_array = vids.tousertensor()
return utils.toindex(vid_array < self.number_of_nodes())
def has_edge_between(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : int
The src node.
v : int
The dst node.
Returns
-------
bool
True if the edge exists
"""
u = F.tensor([u], dtype=F.int64)
v = F.tensor([v], dtype=F.int64)
return self._sparse.has_edges(u, v).asnumpy()[0]
def has_edges_between(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
Returns
-------
utils.Index
0-1 array indicating existence
"""
ret = self._sparse.has_edges(u.tousertensor(), v.tousertensor())
return utils.toindex(ret)
def predecessors(self, v, radius=1):
"""Return the predecessors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
utils.Index
Array of predecessors
"""
pred = self._sparse.predecessors(v, radius)
return utils.toindex(pred)
def successors(self, v, radius=1):
"""Return the successors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
utils.Index
Array of successors
"""
succ = self._sparse.successors(v, radius)
return utils.toindex(succ)
def edge_id(self, u, v):
"""Return the id of the edge.
Parameters
----------
u : int
The src node.
v : int
The dst node.
Returns
-------
int
The edge id.
"""
u = F.tensor([u], dtype=F.int64)
v = F.tensor([v], dtype=F.int64)
_, _, eid = self._sparse.edge_ids(u, v)
return utils.toindex(eid)
def edge_ids(self, u, v):
"""Return the edge ids.
Parameters
----------
u : utils.Index
The src nodes.
v : utils.Index
The dst nodes.
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
u = u.tousertensor()
v = v.tousertensor()
u, v, ids = self._sparse.edge_ids(u, v)
return utils.toindex(u), utils.toindex(v), utils.toindex(ids)
def find_edges(self, eid):
"""Return a triplet of arrays that contains the edge IDs.
Parameters
----------
eid : utils.Index
The edge ids.
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
raise NotImplementedError('immutable graph doesn\'t implement find_edges for now.')
def in_edges(self, v):
"""Return the in edges of the node(s).
Parameters
----------
v : utils.Index
The node(s).
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
dst = v.tousertensor()
indptr, src, edges = self._sparse.in_edges(dst)
off = utils.toindex(indptr)
dst = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
def out_edges(self, v):
"""Return the out edges of the node(s).
Parameters
----------
v : utils.Index
The node(s).
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
src = v.tousertensor()
indptr, dst, edges = self._sparse.out_edges(src)
off = utils.toindex(indptr)
src = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
def edges(self, return_sorted=False):
"""Return all the edges
Parameters
----------
return_sorted : bool
True if the returned edges are sorted by their src and dst ids.
Returns
-------
utils.Index
The src nodes.
utils.Index
The dst nodes.
utils.Index
The edge ids.
"""
if "all_edges" in self._cache:
return self._cache["all_edges"]
src, dst, edges = self._sparse.edges(return_sorted)
self._cache["all_edges"] = (utils.toindex(src), utils.toindex(dst), utils.toindex(edges))
return self._cache["all_edges"]
def _get_in_degree(self):
if 'in_deg' not in self._cache:
self._cache['in_deg'] = self._sparse.get_in_degree()
return self._cache['in_deg']
def _get_out_degree(self):
if 'out_deg' not in self._cache:
self._cache['out_deg'] = self._sparse.get_out_degree()
return self._cache['out_deg']
def in_degree(self, v):
"""Return the in degree of the node.
Parameters
----------
v : int
The node.
Returns
-------
int
The in degree.
"""
deg = self._get_in_degree()
return deg[v]
def in_degrees(self, v):
"""Return the in degrees of the nodes.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
int
The in degree array.
"""
deg = self._get_in_degree()
if v.is_slice(0, self.number_of_nodes()):
return utils.toindex(deg)
else:
v_array = v.tousertensor()
return utils.toindex(F.gather_row(deg, v_array))
def out_degree(self, v):
"""Return the out degree of the node.
Parameters
----------
v : int
The node.
Returns
-------
int
The out degree.
"""
deg = self._get_out_degree()
return deg[v]
def out_degrees(self, v):
"""Return the out degrees of the nodes.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
int
The out degree array.
"""
deg = self._get_out_degree()
if v.is_slice(0, self.number_of_nodes()):
return utils.toindex(deg)
else:
v_array = v.tousertensor()
return utils.toindex(F.gather_row(deg, v_array))
def node_subgraph(self, v):
"""Return the induced node subgraph.
Parameters
----------
v : utils.Index
The nodes.
Returns
-------
ImmutableSubgraphIndex
The subgraph index.
"""
v = v.tousertensor()
gidx, induced_n, induced_e = self._sparse.node_subgraph(v)
return ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a vector of utils.Index
The nodes.
Returns
-------
a vector of ImmutableSubgraphIndex
The subgraph index.
"""
vs_arr = [v.tousertensor() for v in vs_arr]
gis, induced_nodes, induced_edges = self._sparse.node_subgraphs(vs_arr)
return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
def edge_subgraph(self, e):
"""Return the induced edge subgraph.
Parameters
----------
e : utils.Index
The edges.
Returns
-------
SubgraphIndex
The subgraph index.
"""
raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.')
def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type,
node_prob, max_subgraph_size):
"""Neighborhood sampling"""
if len(seed_ids) == 0:
return []
seed_ids = [v.tousertensor() for v in seed_ids]
gis, induced_nodes, induced_edges = self._sparse.neighbor_sampling(seed_ids, expand_factor,
num_hops, neighbor_type,
node_prob,
max_subgraph_size)
induced_nodes = [utils.toindex(v) for v in induced_nodes]
return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
def adjacency_matrix(self, transpose=False, ctx=F.cpu()):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to transpose the returned adjacency matrix.
Returns
-------
utils.CtxCachedObject
An object that returns tensor given context.
utils.Index
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
return self._sparse.adjacency_matrix(transpose, ctx), None
def incidence_matrix(self, typestr, ctx):
"""Return the incidence matrix representation of this graph.
An incidence matrix is an n x m sparse matrix, where n is
the number of nodes and m is the number of edges. Each nnz
value indicating whether the edge is incident to the node
or not.
There are three types of an incidence matrix `I`:
* "in":
- I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e);
- I[v, e] = 0 otherwise.
* "out":
- I[v, e] = 1 if e is the out-edge of v (or v is the src node of e);
- I[v, e] = 0 otherwise.
* "both":
- I[v, e] = 1 if e is the in-edge of v;
- I[v, e] = -1 if e is the out-edge of v;
- I[v, e] = 0 otherwise (including self-loop).
Parameters
----------
typestr : str
Can be either "in", "out" or "both"
ctx : context
The context of returned incidence matrix.
Returns
-------
SparseTensor
The incidence matrix.
utils.Index
A index for data shuffling due to sparse format change. Return None
if shuffle is not required.
"""
raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.')
def to_networkx(self):
"""Convert to networkx graph.
The edge id will be saved as the 'id' edge attribute.
Returns
-------
networkx.DiGraph
The nx graph
"""
src, dst, eid = self.edges()
ret = nx.DiGraph()
for u, v, e in zip(src, dst, eid):
ret.add_edge(u, v, id=e)
return ret
def from_networkx(self, nx_graph):
"""Convert from networkx graph.
If 'id' edge attribute exists, the edge will be added follows
the edge id order. Otherwise, order is undefined.
Parameters
----------
nx_graph : networkx.DiGraph
The nx graph
"""
if not isinstance(nx_graph, nx.Graph):
nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
else nx.DiGraph(nx_graph))
else:
if not nx_graph.is_directed():
# to_directed creates a deep copy of the networkx graph even if
# the original graph is already directed and we do not want to do it.
nx_graph = nx_graph.to_directed()
assert nx_graph.number_of_edges() > 0, "can't create an empty immutable graph"
# nx_graph.edges(data=True) returns src, dst, attr_dict
has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1]
if has_edge_id:
num_edges = nx_graph.number_of_edges()
src = np.zeros((num_edges,), dtype=np.int64)
dst = np.zeros((num_edges,), dtype=np.int64)
for u, v, attr in nx_graph.edges(data=True):
eid = attr['id']
src[eid] = u
dst[eid] = v
else:
src = []
dst = []
for e in nx_graph.edges:
src.append(e[0])
dst.append(e[1])
eid = np.arange(0, len(src), dtype=np.int64)
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
eid = F.tensor(eid, dtype=np.int32)
src = F.tensor(src, dtype=np.int64)
dst = F.tensor(dst, dtype=np.int64)
out_csr, _ = F.sparse_matrix(eid, ('coo', (src, dst)), (num_nodes, num_nodes))
in_csr, _ = F.sparse_matrix(eid, ('coo', (dst, src)), (num_nodes, num_nodes))
out_csr = out_csr.astype(np.int64)
in_csr = in_csr.astype(np.int64)
self._sparse = F.create_immutable_graph_index(in_csr, out_csr)
def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix.
NOTE: we assume the row is src nodes and the col is dst nodes.
Parameters
----------
adj : scipy sparse matrix
"""
if not isinstance(adj, (sp.csr_matrix, sp.coo_matrix)):
raise DGLError("The input matrix has to be a SciPy sparse matrix.")
out_mat = adj.tocoo()
self._sparse.from_coo_matrix(out_mat)
def from_edge_list(self, elist):
"""Convert from an edge list.
Paramters
---------
elist : list
List of (u, v) edge tuple.
"""
self._sparse.from_edge_list(elist)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
Parameters
----------
backtracking : bool, optional (default=False)
Whether (i, j) ~ (j, i) in L(G).
(i, j) ~ (j, i) is the behavior of networkx.line_graph.
Returns
-------
ImmutableGraphIndex
The line graph of this graph.
"""
raise NotImplementedError('immutable graph doesn\'t implement line_graph')
class ImmutableSubgraphIndex(ImmutableGraphIndex):
"""Graph index for an immutable subgraph.
Parameters
----------
backend_sparse : a sparse matrix from the backend framework.
The sparse matrix that represents a subgraph.
paranet : GraphIndex
The parent graph index.
induced_nodes : tensor
The parent node ids in this subgraph.
induced_edges : a lambda function that returns a tensor
The parent edge ids in this subgraph.
"""
def __init__(self, backend_sparse, parent, induced_nodes, induced_edges):
super(ImmutableSubgraphIndex, self).__init__(backend_sparse)
self._parent = parent
self._induced_nodes = induced_nodes
self._induced_edges = induced_edges
@property
def induced_edges(self):
"""Return parent edge ids.
Returns
-------
A lambda function that returns utils.Index
The parent edge ids.
"""
return lambda: utils.toindex(self._induced_edges())
@property
def induced_nodes(self):
"""Return parent node ids.
Returns
-------
utils.Index
The parent node ids.
"""
return utils.toindex(self._induced_nodes)
def disjoint_union(graphs):
"""Return a disjoint union of the input graphs.
The new graph will include all the nodes/edges in the given graphs.
Nodes/Edges will be relabeled by adding the cumsum of the previous graph sizes
in the given sequence order. For example, giving input [g1, g2, g3], where
they have 5, 6, 7 nodes respectively. Then node#2 of g2 will become node#7
in the result graph. Edge ids are re-assigned similarly.
Parameters
----------
graphs : iterable of GraphIndex
The input graphs
Returns
-------
GraphIndex
The disjoint union
"""
raise NotImplementedError('immutable graph doesn\'t implement disjoint_union for now.')
def disjoint_partition(graph, num_or_size_splits):
"""Partition the graph disjointly.
This is a reverse operation of DisjointUnion. The graph will be partitioned
into num graphs. This requires the given number of partitions to evenly
divides the number of nodes in the graph. If the a size list is given,
the sum of the given sizes is equal.
Parameters
----------
graph : GraphIndex
The graph to be partitioned
num_or_size_splits : int or utils.Index
The partition number of size splits
Returns
-------
list of GraphIndex
The partitioned graphs
"""
raise NotImplementedError('immutable graph doesn\'t implement disjoint_partition for now.')
def create_immutable_graph_index(graph_data=None):
"""Create a graph index object.
Parameters
----------
graph_data : graph data, optional
Data to initialize graph. Same as networkx's semantics.
"""
if isinstance(graph_data, ImmutableGraphIndex):
return graph_data
assert F.create_immutable_graph_index is not None, \
"The selected backend doesn't support read-only graph!"
try:
# Let's try using the graph data to generate an immutable graph index.
# If we are successful, we can return the immutable graph index immediately.
# If graph_data is None, we return an empty graph index.
# If we can't create a graph index, we'll use the code below to handle the graph.
return ImmutableGraphIndex(F.create_immutable_graph_index(graph_data))
except Exception: # pylint: disable=broad-except
pass
# Let's create an empty graph index first.
gidx = ImmutableGraphIndex(F.create_immutable_graph_index())
# edge list
if isinstance(graph_data, (list, tuple)):
try:
gidx.from_edge_list(graph_data)
return gidx
except Exception: # pylint: disable=broad-except
raise DGLError('Graph data is not a valid edge list.')
# scipy format
if isinstance(graph_data, sp.spmatrix):
try:
gidx.from_scipy_sparse_matrix(graph_data)
return gidx
except Exception: # pylint: disable=broad-except
raise DGLError('Graph data is not a valid scipy sparse matrix.')
# networkx - any format
try:
gidx.from_networkx(graph_data)
except Exception: # pylint: disable=broad-except
raise DGLError('Error while creating graph from input of type "%s".'
% type(graph_data))
return gidx
_init_api("dgl.immutable_graph_index")
...@@ -12,6 +12,38 @@ ...@@ -12,6 +12,38 @@
#include "../c_api_common.h" #include "../c_api_common.h"
namespace dgl { namespace dgl {
Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph): is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids));
CHECK(IsValidIdArray(dst_ids));
CHECK(IsValidIdArray(edge_ids));
this->AddVertices(num_nodes);
num_edges_ = src_ids->shape[0];
CHECK(num_edges_ == dst_ids->shape[0]) << "vectors in COO must have the same length";
CHECK(num_edges_ == edge_ids->shape[0]) << "vectors in COO must have the same length";
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
all_edges_src_.reserve(num_edges_);
all_edges_dst_.reserve(num_edges_);
for (int64_t i = 0; i < num_edges_; i++) {
auto src = src_data[i];
auto dst = dst_data[i];
auto eid = edge_data[i];
CHECK(HasVertex(src) && HasVertex(dst))
<< "Invalid vertices: src=" << src << " dst=" << dst;
adjlist_[src].succ.push_back(dst);
adjlist_[src].edge_id.push_back(eid);
reverse_adjlist_[dst].succ.push_back(src);
reverse_adjlist_[dst].edge_id.push_back(eid);
all_edges_src_.push_back(src);
all_edges_dst_.push_back(dst);
}
}
void Graph::AddVertices(uint64_t num_vertices) { void Graph::AddVertices(uint64_t num_vertices) {
CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
adjlist_.resize(adjlist_.size() + num_vertices); adjlist_.resize(adjlist_.size() + num_vertices);
...@@ -333,13 +365,13 @@ Graph::EdgeArray Graph::OutEdges(IdArray vids) const { ...@@ -333,13 +365,13 @@ Graph::EdgeArray Graph::OutEdges(IdArray vids) const {
} }
// O(E*log(E)) if sort is required; otherwise, O(E) // O(E*log(E)) if sort is required; otherwise, O(E)
Graph::EdgeArray Graph::Edges(bool sorted) const { Graph::EdgeArray Graph::Edges(const std::string &order) const {
const int64_t len = num_edges_; const int64_t len = num_edges_;
IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
if (sorted) { if (order == "srcdst") {
typedef std::tuple<int64_t, int64_t, int64_t> Tuple; typedef std::tuple<int64_t, int64_t, int64_t> Tuple;
std::vector<Tuple> tuples; std::vector<Tuple> tuples;
tuples.reserve(len); tuples.reserve(len);
...@@ -416,8 +448,9 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { ...@@ -416,8 +448,9 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const {
oldv2newv[vid_data[i]] = i; oldv2newv[vid_data[i]] = i;
} }
Subgraph rst; Subgraph rst;
rst.graph = std::make_shared<Graph>(IsMultigraph());
rst.induced_vertices = vids; rst.induced_vertices = vids;
rst.graph.AddVertices(len); rst.graph->AddVertices(len);
for (int64_t i = 0; i < len; ++i) { for (int64_t i = 0; i < len; ++i) {
const dgl_id_t oldvid = vid_data[i]; const dgl_id_t oldvid = vid_data[i];
const dgl_id_t newvid = i; const dgl_id_t newvid = i;
...@@ -426,7 +459,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { ...@@ -426,7 +459,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const {
if (oldv2newv.count(oldsucc)) { if (oldv2newv.count(oldsucc)) {
const dgl_id_t newsucc = oldv2newv[oldsucc]; const dgl_id_t newsucc = oldv2newv[oldsucc];
edges.push_back(adjlist_[oldvid].edge_id[j]); edges.push_back(adjlist_[oldvid].edge_id[j]);
rst.graph.AddEdge(newvid, newsucc); rst.graph->AddEdge(newvid, newsucc);
} }
} }
} }
...@@ -453,13 +486,14 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { ...@@ -453,13 +486,14 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const {
} }
Subgraph rst; Subgraph rst;
rst.graph = std::make_shared<Graph>(IsMultigraph());
rst.induced_edges = eids; rst.induced_edges = eids;
rst.graph.AddVertices(nodes.size()); rst.graph->AddVertices(nodes.size());
for (int64_t i = 0; i < len; ++i) { for (int64_t i = 0; i < len; ++i) {
dgl_id_t src_id = all_edges_src_[eid_data[i]]; dgl_id_t src_id = all_edges_src_[eid_data[i]];
dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; dgl_id_t dst_id = all_edges_dst_[eid_data[i]];
rst.graph.AddEdge(oldv2newv[src_id], oldv2newv[dst_id]); rst.graph->AddEdge(oldv2newv[src_id], oldv2newv[dst_id]);
} }
rst.induced_vertices = IdArray::Empty( rst.induced_vertices = IdArray::Empty(
...@@ -469,9 +503,59 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { ...@@ -469,9 +503,59 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const {
return rst; return rst;
} }
Graph Graph::Reverse() const { std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const {
int64_t num_edges = NumEdges();
int64_t num_nodes = NumVertices();
if (fmt == "coo") {
IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *idx_data = static_cast<int64_t*>(idx->data);
if (transpose) {
std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges);
} else {
std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
}
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *eid_data = static_cast<int64_t*>(eid->data);
for (uint64_t eid = 0; eid < num_edges; ++eid) {
eid_data[eid] = eid;
}
return std::vector<IdArray>{idx, eid};
} else if (fmt == "csr") {
IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1},
DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
int64_t *indices_data = static_cast<int64_t*>(indices->data);
int64_t *eid_data = static_cast<int64_t*>(eid->data);
const AdjacencyList *adjlist;
if (transpose) {
// Out-edges.
adjlist = &adjlist_;
} else {
// In-edges.
adjlist = &reverse_adjlist_;
}
indptr_data[0] = 0;
for (size_t i = 0; i < adjlist->size(); i++) {
indptr_data[i + 1] = indptr_data[i] + adjlist->at(i).succ.size();
std::copy(adjlist->at(i).succ.begin(), adjlist->at(i).succ.end(),
indices_data + indptr_data[i]);
std::copy(adjlist->at(i).edge_id.begin(), adjlist->at(i).edge_id.end(),
eid_data + indptr_data[i]);
}
return std::vector<IdArray>{indptr, indices, eid};
} else {
LOG(FATAL) << "unsupported format";
return std::vector<IdArray>();
}
}
GraphPtr Graph::Reverse() const {
LOG(FATAL) << "not implemented"; LOG(FATAL) << "not implemented";
return *this; return nullptr;
} }
} // namespace dgl } // namespace dgl
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* \brief DGL graph index APIs * \brief DGL graph index APIs
*/ */
#include <dgl/graph.h> #include <dgl/graph.h>
#include <dgl/immutable_graph.h>
#include <dgl/graph_op.h> #include <dgl/graph_op.h>
#include "../c_api_common.h" #include "../c_api_common.h"
...@@ -17,7 +18,8 @@ namespace dgl { ...@@ -17,7 +18,8 @@ namespace dgl {
namespace { namespace {
// Convert EdgeArray structure to PackedFunc. // Convert EdgeArray structure to PackedFunc.
PackedFunc ConvertEdgeArrayToPackedFunc(const Graph::EdgeArray& ea) { template<class EdgeArray>
PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea) {
auto body = [ea] (DGLArgs args, DGLRetValue* rv) { auto body = [ea] (DGLArgs args, DGLRetValue* rv) {
const int which = args[0]; const int which = args[0];
if (which == 0) { if (which == 0) {
...@@ -33,13 +35,25 @@ PackedFunc ConvertEdgeArrayToPackedFunc(const Graph::EdgeArray& ea) { ...@@ -33,13 +35,25 @@ PackedFunc ConvertEdgeArrayToPackedFunc(const Graph::EdgeArray& ea) {
return PackedFunc(body); return PackedFunc(body);
} }
// Convert CSRArray structure to PackedFunc.
PackedFunc ConvertAdjToPackedFunc(const std::vector<IdArray>& ea) {
auto body = [ea] (DGLArgs args, DGLRetValue* rv) {
const int which = args[0];
if ((size_t) which < ea.size()) {
*rv = std::move(ea[which]);
} else {
LOG(FATAL) << "invalid choice";
}
};
return PackedFunc(body);
}
// Convert Subgraph structure to PackedFunc. // Convert Subgraph structure to PackedFunc.
PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) {
auto body = [sg] (DGLArgs args, DGLRetValue* rv) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) {
const int which = args[0]; const int which = args[0];
if (which == 0) { if (which == 0) {
Graph* gptr = new Graph(); GraphInterface* gptr = sg.graph->Reset();
*gptr = std::move(sg.graph);
GraphHandle ghandle = gptr; GraphHandle ghandle = gptr;
*rv = ghandle; *rv = ghandle;
} else if (which == 1) { } else if (which == 1) {
...@@ -53,26 +67,68 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { ...@@ -53,26 +67,68 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) {
return PackedFunc(body); return PackedFunc(body);
} }
// Convert Sampled Subgraph structures to PackedFunc.
PackedFunc ConvertSubgraphToPackedFunc(const std::vector<SampledSubgraph>& sg) {
auto body = [sg] (DGLArgs args, DGLRetValue* rv) {
const int which = args[0];
if (which < sg.size()) {
GraphInterface* gptr = sg[which].graph->Reset();
GraphHandle ghandle = gptr;
*rv = ghandle;
} else if (which >= sg.size() && which < sg.size() * 2) {
*rv = std::move(sg[which - sg.size()].induced_vertices);
} else if (which >= sg.size() * 2 && which < sg.size() * 3) {
*rv = std::move(sg[which - sg.size() * 2].induced_edges);
} else if (which >= sg.size() * 3 && which < sg.size() * 4) {
*rv = std::move(sg[which - sg.size() * 3].layer_ids);
} else if (which >= sg.size() * 4 && which < sg.size() * 5) {
*rv = std::move(sg[which - sg.size() * 4].sample_prob);
} else {
LOG(FATAL) << "invalid choice";
}
};
// TODO(minjie): figure out a better way of returning a complex results.
return PackedFunc(body);
}
} // namespace } // namespace
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") ///////////////////////////// Graph API ///////////////////////////////////
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
bool multigraph = static_cast<bool>(args[0]); bool multigraph = static_cast<bool>(args[0]);
GraphHandle ghandle = new Graph(multigraph); GraphHandle ghandle = new Graph(multigraph);
*rv = ghandle; *rv = ghandle;
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
const bool multigraph = static_cast<bool>(args[3]);
const int64_t num_nodes = static_cast<int64_t>(args[4]);
const bool readonly = static_cast<bool>(args[5]);
GraphHandle ghandle;
if (readonly)
ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
else
ghandle = new Graph(src_ids, dst_ids, edge_ids, num_nodes, multigraph);
*rv = ghandle;
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFree") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFree")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
Graph* gptr = static_cast<Graph*>(ghandle); GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
delete gptr; delete gptr;
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
Graph* gptr = static_cast<Graph*>(ghandle); GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
uint64_t num_vertices = args[1]; uint64_t num_vertices = args[1];
gptr->AddVertices(num_vertices); gptr->AddVertices(num_vertices);
}); });
...@@ -80,7 +136,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices") ...@@ -80,7 +136,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
Graph* gptr = static_cast<Graph*>(ghandle); GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t src = args[1]; const dgl_id_t src = args[1];
const dgl_id_t dst = args[2]; const dgl_id_t dst = args[2];
gptr->AddEdge(src, dst); gptr->AddEdge(src, dst);
...@@ -89,7 +145,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge") ...@@ -89,7 +145,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
Graph* gptr = static_cast<Graph*>(ghandle); GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
gptr->AddEdges(src, dst); gptr->AddEdges(src, dst);
...@@ -98,7 +154,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges") ...@@ -98,7 +154,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphClear") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphClear")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
Graph* gptr = static_cast<Graph*>(ghandle); GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
gptr->Clear(); gptr->Clear();
}); });
...@@ -106,28 +162,36 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph") ...@@ -106,28 +162,36 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph")
.set_body([] (DGLArgs args, DGLRetValue *rv) { .set_body([] (DGLArgs args, DGLRetValue *rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
// NOTE: not const since we have caches // NOTE: not const since we have caches
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
*rv = gptr->IsMultigraph(); *rv = gptr->IsMultigraph();
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsReadonly")
.set_body([] (DGLArgs args, DGLRetValue *rv) {
GraphHandle ghandle = args[0];
// NOTE: not const since we have caches
const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
*rv = gptr->IsReadonly();
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
*rv = static_cast<int64_t>(gptr->NumVertices()); *rv = static_cast<int64_t>(gptr->NumVertices());
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
*rv = static_cast<int64_t>(gptr->NumEdges()); *rv = static_cast<int64_t>(gptr->NumEdges());
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
*rv = gptr->HasVertex(vid); *rv = gptr->HasVertex(vid);
}); });
...@@ -135,7 +199,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex") ...@@ -135,7 +199,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = gptr->HasVertices(vids); *rv = gptr->HasVertices(vids);
}); });
...@@ -147,17 +211,10 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID") ...@@ -147,17 +211,10 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID")
*rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query); *rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query);
}); });
DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0]));
const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = GraphOp::ExpandIds(ids, offsets);
});
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t src = args[1]; const dgl_id_t src = args[1];
const dgl_id_t dst = args[2]; const dgl_id_t dst = args[2];
*rv = gptr->HasEdgeBetween(src, dst); *rv = gptr->HasEdgeBetween(src, dst);
...@@ -166,7 +223,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") ...@@ -166,7 +223,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
*rv = gptr->HasEdgesBetween(src, dst); *rv = gptr->HasEdgesBetween(src, dst);
...@@ -175,7 +232,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween") ...@@ -175,7 +232,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
const uint64_t radius = args[2]; const uint64_t radius = args[2];
*rv = gptr->Predecessors(vid, radius); *rv = gptr->Predecessors(vid, radius);
...@@ -184,7 +241,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors") ...@@ -184,7 +241,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
const uint64_t radius = args[2]; const uint64_t radius = args[2];
*rv = gptr->Successors(vid, radius); *rv = gptr->Successors(vid, radius);
...@@ -193,7 +250,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors") ...@@ -193,7 +250,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t src = args[1]; const dgl_id_t src = args[1];
const dgl_id_t dst = args[2]; const dgl_id_t dst = args[2];
*rv = gptr->EdgeId(src, dst); *rv = gptr->EdgeId(src, dst);
...@@ -202,7 +259,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId") ...@@ -202,7 +259,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2]));
*rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst)); *rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst));
...@@ -211,7 +268,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds") ...@@ -211,7 +268,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids)); *rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids));
}); });
...@@ -219,7 +276,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges") ...@@ -219,7 +276,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid)); *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid));
}); });
...@@ -227,7 +284,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1") ...@@ -227,7 +284,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids)); *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids));
}); });
...@@ -235,7 +292,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2") ...@@ -235,7 +292,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid)); *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid));
}); });
...@@ -243,7 +300,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1") ...@@ -243,7 +300,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids)); *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids));
}); });
...@@ -251,15 +308,15 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2") ...@@ -251,15 +308,15 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const bool sorted = args[1]; std::string order = args[1];
*rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted)); *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(order));
}); });
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
*rv = static_cast<int64_t>(gptr->InDegree(vid)); *rv = static_cast<int64_t>(gptr->InDegree(vid));
}); });
...@@ -267,7 +324,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") ...@@ -267,7 +324,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = gptr->InDegrees(vids); *rv = gptr->InDegrees(vids);
}); });
...@@ -275,7 +332,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees") ...@@ -275,7 +332,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const dgl_id_t vid = args[1]; const dgl_id_t vid = args[1];
*rv = static_cast<int64_t>(gptr->OutDegree(vid)); *rv = static_cast<int64_t>(gptr->OutDegree(vid));
}); });
...@@ -283,7 +340,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree") ...@@ -283,7 +340,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = gptr->OutDegrees(vids); *rv = gptr->OutDegrees(vids);
}); });
...@@ -291,7 +348,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees") ...@@ -291,7 +348,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface* gptr = static_cast<GraphInterface*>(ghandle);
const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids)); *rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids));
}); });
...@@ -299,7 +356,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph") ...@@ -299,7 +356,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph *gptr = static_cast<Graph*>(ghandle); const GraphInterface *gptr = static_cast<GraphInterface*>(ghandle);
const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
*rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids)); *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids));
}); });
...@@ -311,7 +368,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") ...@@ -311,7 +368,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion")
int list_size = args[1]; int list_size = args[1];
std::vector<const Graph*> graphs; std::vector<const Graph*> graphs;
for (int i = 0; i < list_size; ++i) { for (int i = 0; i < list_size; ++i) {
const Graph* gr = static_cast<const Graph*>(inhandles[i]); const GraphInterface *ptr = static_cast<const GraphInterface *>(inhandles[i]);
const Graph* gr = dynamic_cast<const Graph*>(ptr);
CHECK(gr) << "_CAPI_DGLDisjointUnion isn't implemented in immutable graph";
graphs.push_back(gr); graphs.push_back(gr);
} }
Graph* gptr = new Graph(); Graph* gptr = new Graph();
...@@ -323,7 +382,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") ...@@ -323,7 +382,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const Graph* gptr = dynamic_cast<const Graph*>(ptr);
CHECK(gptr) << "_CAPI_DGLDisjointPartitionByNum isn't implemented in immutable graph";
int64_t num = args[1]; int64_t num = args[1];
std::vector<Graph>&& rst = GraphOp::DisjointPartitionByNum(gptr, num); std::vector<Graph>&& rst = GraphOp::DisjointPartitionByNum(gptr, num);
// return the pointer array as an integer array // return the pointer array as an integer array
...@@ -341,7 +402,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") ...@@ -341,7 +402,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum")
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const Graph* gptr = dynamic_cast<const Graph*>(ptr);
CHECK(gptr) << "_CAPI_DGLDisjointPartitionBySizes isn't implemented in immutable graph";
const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1]));
std::vector<Graph>&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes); std::vector<Graph>&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes);
// return the pointer array as an integer array // return the pointer array as an integer array
...@@ -360,11 +423,62 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") ...@@ -360,11 +423,62 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
bool backtracking = args[1]; bool backtracking = args[1];
const Graph* gptr = static_cast<Graph*>(ghandle); const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const Graph* gptr = dynamic_cast<const Graph*>(ptr);
CHECK(gptr) << "_CAPI_DGLGraphLineGraph isn't implemented in immutable graph";
Graph* lgptr = new Graph(); Graph* lgptr = new Graph();
*lgptr = GraphOp::LineGraph(gptr, backtracking); *lgptr = GraphOp::LineGraph(gptr, backtracking);
GraphHandle lghandle = lgptr; GraphHandle lghandle = lgptr;
*rv = lghandle; *rv = lghandle;
}); });
template<int num_seeds>
void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
std::vector<IdArray> seeds(num_seeds);
for (size_t i = 0; i < seeds.size(); i++)
seeds[i] = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[i + 1]));
std::string neigh_type = args[num_seeds + 1];
const int num_hops = args[num_seeds + 2];
const int num_neighbors = args[num_seeds + 3];
const int num_valid_seeds = args[num_seeds + 4];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
const ImmutableGraph *gptr = dynamic_cast<const ImmutableGraph*>(ptr);
CHECK(gptr) << "sampling isn't implemented in mutable graph";
CHECK(num_valid_seeds <= num_seeds);
std::vector<SampledSubgraph> subgs(seeds.size());
#pragma omp parallel for
for (int i = 0; i < num_valid_seeds; i++) {
subgs[i] = gptr->NeighborUniformSample(seeds[i], neigh_type, num_hops, num_neighbors);
}
*rv = ConvertSubgraphToPackedFunc(subgs);
}
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling")
.set_body(CAPI_NeighborUniformSample<1>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling2")
.set_body(CAPI_NeighborUniformSample<2>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling4")
.set_body(CAPI_NeighborUniformSample<4>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling8")
.set_body(CAPI_NeighborUniformSample<8>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling16")
.set_body(CAPI_NeighborUniformSample<16>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling32")
.set_body(CAPI_NeighborUniformSample<32>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling64")
.set_body(CAPI_NeighborUniformSample<64>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling128")
.set_body(CAPI_NeighborUniformSample<128>);
DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0];
bool transpose = args[1];
std::string format = args[2];
const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
auto res = ptr->GetAdj(transpose, format);
*rv = ConvertAdjToPackedFunc(res);
});
} // namespace dgl } // namespace dgl
/*!
* Copyright (c) 2018 by Contributors
* \file graph/immutable_graph.cc
* \brief DGL immutable graph index implementation
*/
#include <dgl/immutable_graph.h>
#include <cstdlib>
#ifdef _MSC_VER
// rand in MS compiler works well in multi-threading.
int rand_r(unsigned *seed) {
return rand();
}
#define _CRT_RAND_S
#endif
#include "../c_api_common.h"
namespace dgl {
template<class ForwardIt, class T>
bool binary_search(ForwardIt first, ForwardIt last, const T& value) {
first = std::lower_bound(first, last, value);
return (!(first == last) && !(value < *first));
}
ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
const int64_t off = this->indptr[vid];
const int64_t len = this->GetDegree(vid);
IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* src_data = static_cast<dgl_id_t*>(src->data);
dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eid->data);
for (int64_t i = 0; i < len; ++i) {
src_data[i] = this->indices[off + i];
eid_data[i] = this->edge_ids[off + i];
}
std::fill(dst_data, dst_data + len, vid);
return ImmutableGraph::EdgeArray{src, dst, eid};
}
ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const auto len = vids->shape[0];
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
int64_t rstlen = 0;
for (int64_t i = 0; i < len; ++i) {
dgl_id_t vid = vid_data[i];
CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
rstlen += this->GetDegree(vid);
}
IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx);
dgl_id_t* src_ptr = static_cast<dgl_id_t*>(src->data);
dgl_id_t* dst_ptr = static_cast<dgl_id_t*>(dst->data);
dgl_id_t* eid_ptr = static_cast<dgl_id_t*>(eid->data);
for (int64_t i = 0; i < len; ++i) {
dgl_id_t vid = vid_data[i];
int64_t off = this->indptr[vid];
const int64_t len = this->GetDegree(vid);
const auto *pred = &this->indices[off];
const auto *eids = &this->edge_ids[off];
for (int64_t j = 0; j < len; ++j) {
*(src_ptr++) = pred[j];
*(dst_ptr++) = vid;
*(eid_ptr++) = eids[j];
}
}
return ImmutableGraph::EdgeArray{src, dst, eid};
}
DegreeArray ImmutableGraph::CSR::GetDegrees(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const auto len = vids->shape[0];
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
for (int64_t i = 0; i < len; ++i) {
const auto vid = vid_data[i];
CHECK(HasVertex(vid)) << "Invalid vertex: " << vid;
rst_data[i] = this->GetDegree(vid);
}
return rst;
}
class Bitmap {
const size_t size = 1024 * 1024 * 4;
const size_t mask = size - 1;
std::vector<bool> map;
size_t hash(dgl_id_t id) const {
return id & mask;
}
public:
Bitmap(const dgl_id_t *vid_data, int64_t len): map(size) {
for (int64_t i = 0; i < len; ++i) {
map[hash(vid_data[i])] = 1;
}
}
bool test(dgl_id_t id) const {
return map[hash(id)];
}
};
/*
* This uses a hashtable to check if a node is in the given node list.
*/
class HashTableChecker {
std::unordered_map<dgl_id_t, dgl_id_t> oldv2newv;
// This bitmap is used as a bloom filter to remove some lookups.
// Hashtable is very slow. Using bloom filter can significantly speed up lookups.
Bitmap map;
/*
* This is to test if a vertex is in the induced subgraph.
* If it is, the edge on this vertex and the source vertex will be collected.
* `old_id` is the vertex we test, `old_eid` is the edge Id between the `old_id`
* and the source vertex. `col_idx` and `orig_eids` store the collected edges.
*/
void Collect(const dgl_id_t old_id, const dgl_id_t old_eid,
std::vector<dgl_id_t> *col_idx,
std::vector<dgl_id_t> *orig_eids) {
if (!map.test(old_id))
return;
auto it = oldv2newv.find(old_id);
if (it != oldv2newv.end()) {
const dgl_id_t new_id = it->second;
col_idx->push_back(new_id);
if (orig_eids)
orig_eids->push_back(old_eid);
}
}
public:
HashTableChecker(const dgl_id_t *vid_data, int64_t len): map(vid_data, len) {
oldv2newv.reserve(len);
for (int64_t i = 0; i < len; ++i) {
oldv2newv[vid_data[i]] = i;
}
}
/*
* This is to collect edges from the neighborhood of a vertex.
* `neigh_idx`, `eids` and `row_len` indicates the neighbor list of the vertex.
* The collected edges are stored in `new_neigh_idx` and `orig_eids`.
*/
void CollectOnRow(const dgl_id_t neigh_idx[], const dgl_id_t eids[], size_t row_len,
std::vector<dgl_id_t> *new_neigh_idx,
std::vector<dgl_id_t> *orig_eids) {
// TODO(zhengda) I need to make sure the column index in each row is sorted.
for (size_t j = 0; j < row_len; ++j) {
const dgl_id_t oldsucc = neigh_idx[j];
const dgl_id_t eid = eids[j];
Collect(oldsucc, eid, new_neigh_idx, orig_eids);
}
}
};
std::pair<ImmutableGraph::CSR::Ptr, IdArray> ImmutableGraph::CSR::VertexSubgraph(
IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
const int64_t len = vids->shape[0];
HashTableChecker def_check(vid_data, len);
// check if varr is sorted.
CHECK(std::is_sorted(vid_data, vid_data + len)) << "The input vertex list has to be sorted";
// Collect the non-zero entries in from the original graph.
std::vector<dgl_id_t> orig_edge_ids;
orig_edge_ids.reserve(len);
auto sub_csr = std::make_shared<CSR>(len, len);
sub_csr->indptr[0] = 0;
for (int64_t i = 0; i < len; ++i) {
const dgl_id_t oldvid = vid_data[i];
CHECK_LT(oldvid, NumVertices()) << "Vertex Id " << oldvid << " isn't in a graph of "
<< NumVertices() << " vertices";
size_t row_start = indptr[oldvid];
size_t row_len = indptr[oldvid + 1] - indptr[oldvid];
def_check.CollectOnRow(&indices[row_start], &edge_ids[row_start], row_len,
&sub_csr->indices, &orig_edge_ids);
sub_csr->indptr[i + 1] = sub_csr->indices.size();
}
// Store the non-zeros in a subgraph with edge attributes of new edge ids.
sub_csr->edge_ids.resize(sub_csr->indices.size());
for (int64_t i = 0; i < sub_csr->edge_ids.size(); i++)
sub_csr->edge_ids[i] = i;
IdArray rst_eids = IdArray::Empty({static_cast<int64_t>(orig_edge_ids.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* eid_data = static_cast<dgl_id_t*>(rst_eids->data);
std::copy(orig_edge_ids.begin(), orig_edge_ids.end(), eid_data);
return std::pair<ImmutableGraph::CSR::Ptr, IdArray>(sub_csr, rst_eids);
}
ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector<Edge> *edges,
int sort_on, int64_t num_nodes) {
CHECK(sort_on == 0 || sort_on == 1) << "we must sort on the first or the second vector";
int other_end = sort_on == 1 ? 0 : 1;
// TODO(zhengda) we should sort in parallel.
std::sort(edges->begin(), edges->end(), [sort_on, other_end](const Edge &e1, const Edge &e2) {
if (e1.end_points[sort_on] == e2.end_points[sort_on]) {
return e1.end_points[other_end] < e2.end_points[other_end];
} else {
return e1.end_points[sort_on] < e2.end_points[sort_on];
}
});
auto t = std::make_shared<CSR>(0, 0);
t->indices.resize(edges->size());
t->edge_ids.resize(edges->size());
for (size_t i = 0; i < edges->size(); i++) {
t->indices[i] = edges->at(i).end_points[other_end];
CHECK(t->indices[i] < num_nodes);
t->edge_ids[i] = edges->at(i).edge_id;
dgl_id_t vid = edges->at(i).end_points[sort_on];
CHECK(vid < num_nodes);
while (vid > 0 && t->indptr.size() <= static_cast<size_t>(vid)) {
t->indptr.push_back(i);
}
CHECK(t->indptr.size() == vid + 1);
}
while (t->indptr.size() < num_nodes + 1) {
t->indptr.push_back(edges->size());
}
CHECK(t->indptr.size() == num_nodes + 1);
return t;
}
void ImmutableGraph::CSR::ReadAllEdges(std::vector<Edge> *edges) const {
edges->resize(NumEdges());
for (size_t i = 0; i < NumVertices(); i++) {
const dgl_id_t *indices_begin = &indices[indptr[i]];
const dgl_id_t *eid_begin = &edge_ids[indptr[i]];
for (size_t j = 0; j < GetDegree(i); j++) {
Edge e;
e.end_points[0] = i;
e.end_points[1] = indices_begin[j];
e.edge_id = eid_begin[j];
(*edges)[indptr[i] + j] = e;
}
}
}
ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const {
std::vector<Edge> edges;
ReadAllEdges(&edges);
return FromEdges(&edges, 1, NumVertices());
}
ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph) : is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids)) << "Invalid vertex id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid vertex id array.";
CHECK(IsValidIdArray(edge_ids)) << "Invalid vertex id array.";
const int64_t len = src_ids->shape[0];
CHECK(len == dst_ids->shape[0]);
CHECK(len == edge_ids->shape[0]);
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
std::vector<Edge> edges(len);
for (size_t i = 0; i < edges.size(); i++) {
Edge e;
e.end_points[0] = src_data[i];
e.end_points[1] = dst_data[i];
e.edge_id = edge_data[i];
edges[i] = e;
}
in_csr_ = CSR::FromEdges(&edges, 1, num_nodes);
out_csr_ = CSR::FromEdges(&edges, 0, num_nodes);
}
BoolArray ImmutableGraph::HasVertices(IdArray vids) const {
CHECK(IsValidIdArray(vids)) << "Invalid vertex id array.";
const auto len = vids->shape[0];
BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const int64_t nverts = NumVertices();
for (int64_t i = 0; i < len; ++i) {
rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
}
return rst;
}
bool ImmutableGraph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const {
if (!HasVertex(src) || !HasVertex(dst)) return false;
if (this->in_csr_) {
auto pred = this->in_csr_->GetIndexRef(dst);
return dgl::binary_search(pred.begin(), pred.end(), src);
} else {
CHECK(this->out_csr_) << "one of the CSRs must exist";
auto succ = this->out_csr_->GetIndexRef(src);
return dgl::binary_search(succ.begin(), succ.end(), dst);
}
}
BoolArray ImmutableGraph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const {
CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
const auto srclen = src_ids->shape[0];
const auto dstlen = dst_ids->shape[0];
const auto rstlen = std::max(srclen, dstlen);
BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
if (srclen == 1) {
// one-many
for (int64_t i = 0; i < dstlen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0;
}
} else if (dstlen == 1) {
// many-one
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0;
}
} else {
// many-many
CHECK(srclen == dstlen) << "Invalid src and dst id array.";
for (int64_t i = 0; i < srclen; ++i) {
rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0;
}
}
return rst;
}
IdArray ImmutableGraph::Predecessors(dgl_id_t vid, uint64_t radius) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
CHECK(radius >= 1) << "invalid radius: " << radius;
auto pred = this->GetInCSR()->GetIndexRef(vid);
const int64_t len = pred.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
std::copy(pred.begin(), pred.end(), rst_data);
return rst;
}
IdArray ImmutableGraph::Successors(dgl_id_t vid, uint64_t radius) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
CHECK(radius >= 1) << "invalid radius: " << radius;
auto succ = this->GetOutCSR()->GetIndexRef(vid);
const int64_t len = succ.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
std::copy(succ.begin(), succ.end(), rst_data);
return rst;
}
DGLIdIters ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const {
CHECK(this->in_csr_);
auto pred = this->in_csr_->GetIndexRef(dst);
auto it = std::lower_bound(pred.begin(), pred.end(), src);
// If there doesn't exist edges between the two nodes.
if (it == pred.end() || *it != src) {
return DGLIdIters(it, it);
}
size_t off = it - in_csr_->indices.begin();
CHECK(off < in_csr_->indices.size());
auto start = in_csr_->edge_ids.begin() + off;
int64_t len = 0;
// There are edges between the source and the destination.
for (auto it1 = it; it1 != pred.end() && *it1 == src; it1++, len++) {}
return DGLIdIters(start, start + len);
}
DGLIdIters ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const {
CHECK(this->out_csr_);
auto succ = this->out_csr_->GetIndexRef(src);
auto it = std::lower_bound(succ.begin(), succ.end(), dst);
// If there doesn't exist edges between the two nodes.
if (it == succ.end() || *it != dst) {
return DGLIdIters(it, it);
}
size_t off = it - out_csr_->indices.begin();
CHECK(off < out_csr_->indices.size());
auto start = out_csr_->edge_ids.begin() + off;
int64_t len = 0;
// There are edges between the source and the destination.
for (auto it1 = it; it1 != succ.end() && *it1 == dst; it1++, len++) {}
return DGLIdIters(start, start + len);
}
IdArray ImmutableGraph::EdgeId(dgl_id_t src, dgl_id_t dst) const {
CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst;
auto edge_ids = in_csr_ ? GetInEdgeIdRef(src, dst) : GetOutEdgeIdRef(src, dst);
int64_t len = edge_ids.size();
IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
if (len > 0) {
std::copy(edge_ids.begin(), edge_ids.end(), rst_data);
}
return rst;
}
ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_ids) const {
CHECK(IsValidIdArray(src_ids)) << "Invalid src id array.";
CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array.";
const auto srclen = src_ids->shape[0];
const auto dstlen = dst_ids->shape[0];
CHECK((srclen == dstlen) || (srclen == 1) || (dstlen == 1))
<< "Invalid src and dst id array.";
const int src_stride = (srclen == 1 && dstlen != 1) ? 0 : 1;
const int dst_stride = (dstlen == 1 && srclen != 1) ? 0 : 1;
const dgl_id_t* src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t* dst_data = static_cast<dgl_id_t*>(dst_ids->data);
std::vector<dgl_id_t> src, dst, eid;
for (int64_t i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) {
const dgl_id_t src_id = src_data[i], dst_id = dst_data[j];
CHECK(HasVertex(src_id) && HasVertex(dst_id)) <<
"invalid edge: " << src_id << " -> " << dst_id;
auto edges = this->in_csr_ ? GetInEdgeIdRef(src_id, dst_id) : GetOutEdgeIdRef(src_id, dst_id);
for (size_t k = 0; k < edges.size(); k++) {
src.push_back(src_id);
dst.push_back(dst_id);
eid.push_back(edges[k]);
}
}
const int64_t rstlen = src.size();
IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx);
dgl_id_t* rst_src_data = static_cast<dgl_id_t*>(rst_src->data);
dgl_id_t* rst_dst_data = static_cast<dgl_id_t*>(rst_dst->data);
dgl_id_t* rst_eid_data = static_cast<dgl_id_t*>(rst_eid->data);
std::copy(src.begin(), src.end(), rst_src_data);
std::copy(dst.begin(), dst.end(), rst_dst_data);
std::copy(eid.begin(), eid.end(), rst_eid_data);
return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid};
}
ImmutableGraph::EdgeArray ImmutableGraph::Edges(const std::string &order) const {
int64_t rstlen = NumEdges();
IdArray rst_src = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray rst_dst = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray rst_eid = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
dgl_id_t* rst_src_data = static_cast<dgl_id_t*>(rst_src->data);
dgl_id_t* rst_dst_data = static_cast<dgl_id_t*>(rst_dst->data);
dgl_id_t* rst_eid_data = static_cast<dgl_id_t*>(rst_eid->data);
if (order.empty() || order == "srcdst") {
auto out_csr = GetOutCSR();
// If sorted, the returned edges are sorted by the source Id and dest Id.
for (size_t i = 0; i < out_csr->indptr.size() - 1; i++) {
std::fill(rst_src_data + out_csr->indptr[i], rst_src_data + out_csr->indptr[i + 1],
static_cast<dgl_id_t>(i));
}
std::copy(out_csr->indices.begin(), out_csr->indices.end(), rst_dst_data);
std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), rst_eid_data);
} else if (order == "eid") {
std::vector<Edge> edges;
auto out_csr = GetOutCSR();
out_csr->ReadAllEdges(&edges);
std::sort(edges.begin(), edges.end(), [](const Edge &e1, const Edge &e2) {
return e1.edge_id < e2.edge_id;
});
for (size_t i = 0; i < edges.size(); i++) {
rst_src_data[i] = edges[i].end_points[0];
rst_dst_data[i] = edges[i].end_points[1];
rst_eid_data[i] = edges[i].edge_id;
}
} else {
LOG(FATAL) << "unsupported order " << order;
}
return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid};
}
Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const {
Subgraph subg;
std::pair<CSR::Ptr, IdArray> ret;
// We prefer to generate a subgraph for out-csr first.
if (out_csr_) {
ret = out_csr_->VertexSubgraph(vids);
subg.graph = GraphPtr(new ImmutableGraph(nullptr, ret.first, IsMultigraph()));
} else {
CHECK(in_csr_);
ret = in_csr_->VertexSubgraph(vids);
// When we generate a subgraph, it may be used by only accessing in-edges or out-edges.
// We don't need to generate both.
subg.graph = GraphPtr(new ImmutableGraph(ret.first, nullptr, IsMultigraph()));
}
subg.induced_vertices = vids;
subg.induced_edges = ret.second;
return subg;
}
Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const {
LOG(FATAL) << "EdgeSubgraph isn't implemented in immutable graph";
return Subgraph();
}
ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const {
auto in_csr = GetInCSR();
IdArray indptr = IdArray::Empty({static_cast<int64_t>(in_csr->indptr.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({static_cast<int64_t>(in_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eids = IdArray::Empty({static_cast<int64_t>(in_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
std::copy(in_csr->indptr.begin(), in_csr->indptr.end(), indptr_data);
std::copy(in_csr->indices.begin(), in_csr->indices.end(), indices_data);
std::copy(in_csr->edge_ids.begin(), in_csr->edge_ids.end(), eid_data);
return CSRArray{indptr, indices, eids};
}
ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray() const {
auto out_csr = GetOutCSR();
IdArray indptr = IdArray::Empty({static_cast<int64_t>(out_csr->indptr.size())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({static_cast<int64_t>(out_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eids = IdArray::Empty({static_cast<int64_t>(out_csr->NumEdges())},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
dgl_id_t* indices_data = static_cast<dgl_id_t*>(indices->data);
dgl_id_t* eid_data = static_cast<dgl_id_t*>(eids->data);
std::copy(out_csr->indptr.begin(), out_csr->indptr.end(), indptr_data);
std::copy(out_csr->indices.begin(), out_csr->indices.end(), indices_data);
std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), eid_data);
return CSRArray{indptr, indices, eids};
}
std::vector<IdArray> ImmutableGraph::GetAdj(bool transpose, const std::string &fmt) const {
if (fmt == "csr") {
CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetInCSRArray();
return std::vector<IdArray>{arrs.indptr, arrs.indices, arrs.id};
} else if (fmt == "coo") {
int64_t num_edges = this->NumEdges();
IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
CSR::Ptr csr = transpose ? GetOutCSR() : GetInCSR();
int64_t *idx_data = static_cast<int64_t*>(idx->data);
dgl_id_t *eid_data = static_cast<dgl_id_t*>(eid->data);
for (size_t i = 0; i < csr->indptr.size() - 1; i++) {
for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++)
idx_data[j] = i;
}
std::copy(csr->indices.begin(), csr->indices.end(), idx_data + num_edges);
std::copy(csr->edge_ids.begin(), csr->edge_ids.end(), eid_data);
return std::vector<IdArray>{idx, eid};
} else {
LOG(FATAL) << "unsupported adjacency matrix format";
return std::vector<IdArray>();
}
}
////////////////////////////// Graph Sampling ///////////////////////////////
/*
* ArrayHeap is used to sample elements from vector
*/
class ArrayHeap {
public:
explicit ArrayHeap(const std::vector<float>& prob) {
vec_size_ = prob.size();
bit_len_ = ceil(log2(vec_size_));
limit_ = 1 << bit_len_;
// allocate twice the size
heap_.resize(limit_ << 1, 0);
// allocate the leaves
for (int i = limit_; i < vec_size_+limit_; ++i) {
heap_[i] = prob[i-limit_];
}
// iterate up the tree (this is O(m))
for (int i = bit_len_-1; i >= 0; --i) {
for (int j = (1 << i); j < (1 << (i + 1)); ++j) {
heap_[j] = heap_[j << 1] + heap_[(j << 1) + 1];
}
}
}
~ArrayHeap() {}
/*
* Remove term from index (this costs O(log m) steps)
*/
void Delete(size_t index) {
size_t i = index + limit_;
float w = heap_[i];
for (int j = bit_len_; j >= 0; --j) {
heap_[i] -= w;
i = i >> 1;
}
}
/*
* Add value w to index (this costs O(log m) steps)
*/
void Add(size_t index, float w) {
size_t i = index + limit_;
for (int j = bit_len_; j >= 0; --j) {
heap_[i] += w;
i = i >> 1;
}
}
/*
* Sample from arrayHeap
*/
size_t Sample(unsigned int* seed) {
float xi = heap_[1] * (rand_r(seed)%100/101.0);
int i = 1;
while (i < limit_) {
i = i << 1;
if (xi >= heap_[i]) {
xi -= heap_[i];
i += 1;
}
}
return i - limit_;
}
/*
* Sample a vector by given the size n
*/
void SampleWithoutReplacement(size_t n, std::vector<size_t>* samples, unsigned int* seed) {
// sample n elements
for (size_t i = 0; i < n; ++i) {
samples->at(i) = this->Sample(seed);
this->Delete(samples->at(i));
}
}
private:
int vec_size_; // sample size
int bit_len_; // bit size
int limit_;
std::vector<float> heap_;
};
/*
* Uniformly sample integers from [0, set_size) without replacement.
*/
static void RandomSample(size_t set_size,
size_t num,
std::vector<size_t>* out,
unsigned int* seed) {
std::unordered_set<size_t> sampled_idxs;
while (sampled_idxs.size() < num) {
sampled_idxs.insert(rand_r(seed) % set_size);
}
out->clear();
for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) {
out->push_back(*it);
}
}
/*
* For a sparse array whose non-zeros are represented by nz_idxs,
* negate the sparse array and outputs the non-zeros in the negated array.
*/
static void NegateArray(const std::vector<size_t> &nz_idxs,
size_t arr_size,
std::vector<size_t>* out) {
// nz_idxs must have been sorted.
auto it = nz_idxs.begin();
size_t i = 0;
CHECK_GT(arr_size, nz_idxs.back());
for (; i < arr_size && it != nz_idxs.end(); i++) {
if (*it == i) {
it++;
continue;
}
out->push_back(i);
}
for (; i < arr_size; i++) {
out->push_back(i);
}
}
/*
* Uniform sample vertices from a list of vertices.
*/
static void GetUniformSample(const dgl_id_t* val_list,
const dgl_id_t* ver_list,
const size_t ver_len,
const size_t max_num_neighbor,
std::vector<dgl_id_t>* out_ver,
std::vector<dgl_id_t>* out_edge,
unsigned int* seed) {
// Copy ver_list to output
if (ver_len <= max_num_neighbor) {
for (size_t i = 0; i < ver_len; ++i) {
out_ver->push_back(ver_list[i]);
out_edge->push_back(val_list[i]);
}
return;
}
// If we just sample a small number of elements from a large neighbor list.
std::vector<size_t> sorted_idxs;
if (ver_len > max_num_neighbor * 2) {
sorted_idxs.reserve(max_num_neighbor);
RandomSample(ver_len, max_num_neighbor, &sorted_idxs, seed);
std::sort(sorted_idxs.begin(), sorted_idxs.end());
} else {
std::vector<size_t> negate;
negate.reserve(ver_len - max_num_neighbor);
RandomSample(ver_len, ver_len - max_num_neighbor,
&negate, seed);
std::sort(negate.begin(), negate.end());
NegateArray(negate, ver_len, &sorted_idxs);
}
// verify the result.
CHECK_EQ(sorted_idxs.size(), max_num_neighbor);
for (size_t i = 1; i < sorted_idxs.size(); i++) {
CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]);
}
for (auto idx : sorted_idxs) {
out_ver->push_back(ver_list[idx]);
out_edge->push_back(val_list[idx]);
}
}
/*
* Non-uniform sample via ArrayHeap
*/
static void GetNonUniformSample(const float* probability,
const dgl_id_t* val_list,
const dgl_id_t* ver_list,
const size_t ver_len,
const size_t max_num_neighbor,
std::vector<dgl_id_t>* out_ver,
std::vector<dgl_id_t>* out_edge,
unsigned int* seed) {
// Copy ver_list to output
if (ver_len <= max_num_neighbor) {
for (size_t i = 0; i < ver_len; ++i) {
out_ver->push_back(ver_list[i]);
out_edge->push_back(val_list[i]);
}
return;
}
// Make sample
std::vector<size_t> sp_index(max_num_neighbor);
std::vector<float> sp_prob(ver_len);
for (size_t i = 0; i < ver_len; ++i) {
sp_prob[i] = probability[ver_list[i]];
}
ArrayHeap arrayHeap(sp_prob);
arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed);
out_ver->resize(max_num_neighbor);
out_edge->resize(max_num_neighbor);
for (size_t i = 0; i < max_num_neighbor; ++i) {
size_t idx = sp_index[i];
out_ver->at(i) = ver_list[idx];
out_edge->at(i) = val_list[idx];
}
sort(out_ver->begin(), out_ver->end());
sort(out_edge->begin(), out_edge->end());
}
/*
* Used for subgraph sampling
*/
struct neigh_list {
std::vector<dgl_id_t> neighs;
std::vector<dgl_id_t> edges;
neigh_list(const std::vector<dgl_id_t> &_neighs,
const std::vector<dgl_id_t> &_edges)
: neighs(_neighs), edges(_edges) {}
};
SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
const float* probability,
const std::string &neigh_type,
int num_hops,
size_t num_neighbor) const {
unsigned int time_seed = time(nullptr);
size_t num_seeds = seed_arr->shape[0];
auto orig_csr = neigh_type == "in" ? GetInCSR() : GetOutCSR();
const dgl_id_t* val_list = orig_csr->edge_ids.data();
const dgl_id_t* col_list = orig_csr->indices.data();
const int64_t* indptr = orig_csr->indptr.data();
const dgl_id_t* seed = static_cast<dgl_id_t*>(seed_arr->data);
// BFS traverse the graph and sample vertices
// <vertex_id, layer_id>
std::unordered_set<dgl_id_t> sub_ver_map;
std::vector<std::pair<dgl_id_t, dgl_id_t> > sub_vers;
sub_vers.reserve(num_seeds * 10);
// add seed vertices
for (size_t i = 0; i < num_seeds; ++i) {
auto ret = sub_ver_map.insert(seed[i]);
// If the vertex is inserted successfully.
if (ret.second) {
sub_vers.emplace_back(seed[i], 0);
}
}
std::vector<dgl_id_t> tmp_sampled_src_list;
std::vector<dgl_id_t> tmp_sampled_edge_list;
// ver_id, position
std::vector<std::pair<dgl_id_t, size_t> > neigh_pos;
neigh_pos.reserve(num_seeds);
std::vector<dgl_id_t> neighbor_list;
int64_t num_edges = 0;
// sub_vers is used both as a node collection and a queue.
// In the while loop, we iterate over sub_vers and new nodes are added to the vector.
// A vertex in the vector only needs to be accessed once. If there is a vertex behind idx
// isn't in the last level, we will sample its neighbors. If not, the while loop terminates.
size_t idx = 0;
while (idx < sub_vers.size()) {
dgl_id_t dst_id = sub_vers[idx].first;
int cur_node_level = sub_vers[idx].second;
idx++;
// If the node is in the last level, we don't need to sample neighbors
// from this node.
if (cur_node_level >= num_hops)
continue;
tmp_sampled_src_list.clear();
tmp_sampled_edge_list.clear();
dgl_id_t ver_len = *(indptr+dst_id+1) - *(indptr+dst_id);
if (probability == nullptr) { // uniform-sample
GetUniformSample(val_list + *(indptr + dst_id),
col_list + *(indptr + dst_id),
ver_len,
num_neighbor,
&tmp_sampled_src_list,
&tmp_sampled_edge_list,
&time_seed);
} else { // non-uniform-sample
GetNonUniformSample(probability,
val_list + *(indptr + dst_id),
col_list + *(indptr + dst_id),
ver_len,
num_neighbor,
&tmp_sampled_src_list,
&tmp_sampled_edge_list,
&time_seed);
}
CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size());
size_t pos = neighbor_list.size();
neigh_pos.emplace_back(dst_id, pos);
// First we push the size of neighbor vector
neighbor_list.push_back(tmp_sampled_edge_list.size());
// Then push the vertices
for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
neighbor_list.push_back(tmp_sampled_src_list[i]);
}
// Finally we push the edge list
for (size_t i = 0; i < tmp_sampled_edge_list.size(); ++i) {
neighbor_list.push_back(tmp_sampled_edge_list[i]);
}
num_edges += tmp_sampled_src_list.size();
for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) {
// We need to add the neighbor in the hashtable here. This ensures that
// the vertex in the queue is unique. If we see a vertex before, we don't
// need to add it to the queue again.
auto ret = sub_ver_map.insert(tmp_sampled_src_list[i]);
// If the sampled neighbor is inserted to the map successfully.
if (ret.second)
sub_vers.emplace_back(tmp_sampled_src_list[i], cur_node_level + 1);
}
}
// Let's check if there is a vertex that we haven't sampled its neighbors.
for (; idx < sub_vers.size(); idx++) {
if (sub_vers[idx].second < num_hops) {
LOG(WARNING)
<< "The sampling is truncated because we have reached the max number of vertices\n"
<< "Please use a smaller number of seeds or a small neighborhood";
break;
}
}
// Copy sub_ver_map to output[0]
// Copy layer
int64_t num_vertices = sub_ver_map.size();
std::sort(sub_vers.begin(), sub_vers.end(),
[](const std::pair<dgl_id_t, dgl_id_t> &a1, const std::pair<dgl_id_t, dgl_id_t> &a2) {
return a1.first < a2.first;
});
SampledSubgraph subg;
subg.induced_vertices = IdArray::Empty({num_vertices},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
subg.induced_edges = IdArray::Empty({num_edges},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
subg.layer_ids = IdArray::Empty({num_vertices},
DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
subg.sample_prob = runtime::NDArray::Empty({num_vertices},
DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0});
dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);
dgl_id_t *out_layer = static_cast<dgl_id_t *>(subg.layer_ids->data);
for (size_t i = 0; i < sub_vers.size(); i++) {
out[i] = sub_vers[i].first;
out_layer[i] = sub_vers[i].second;
}
// Copy sub_probability
float *sub_prob = static_cast<float *>(subg.sample_prob->data);
if (probability != nullptr) {
for (size_t i = 0; i < sub_ver_map.size(); ++i) {
dgl_id_t idx = out[i];
sub_prob[i] = probability[idx];
}
}
// Construct sub_csr_graph
auto subg_csr = std::make_shared<CSR>(num_vertices, num_edges);
subg_csr->indices.resize(num_edges);
subg_csr->edge_ids.resize(num_edges);
dgl_id_t* val_list_out = static_cast<dgl_id_t *>(subg.induced_edges->data);
dgl_id_t* col_list_out = subg_csr->indices.data();
int64_t* indptr_out = subg_csr->indptr.data();
size_t collected_nedges = 0;
// Both the out array and neigh_pos are sorted. By scanning the two arrays, we can see
// which vertices have neighbors and which don't.
std::sort(neigh_pos.begin(), neigh_pos.end(),
[](const std::pair<dgl_id_t, size_t> &a1, const std::pair<dgl_id_t, size_t> &a2) {
return a1.first < a2.first;
});
size_t idx_with_neigh = 0;
for (size_t i = 0; i < num_vertices; i++) {
dgl_id_t dst_id = *(out + i);
// If a vertex is in sub_ver_map but not in neigh_pos, this vertex must not
// have edges.
size_t edge_size = 0;
if (idx_with_neigh < neigh_pos.size() && dst_id == neigh_pos[idx_with_neigh].first) {
size_t pos = neigh_pos[idx_with_neigh].second;
CHECK_LT(pos, neighbor_list.size());
edge_size = neighbor_list[pos];
CHECK_LE(pos + edge_size * 2 + 1, neighbor_list.size());
std::copy_n(neighbor_list.begin() + pos + 1,
edge_size,
col_list_out + collected_nedges);
std::copy_n(neighbor_list.begin() + pos + edge_size + 1,
edge_size,
val_list_out + collected_nedges);
collected_nedges += edge_size;
idx_with_neigh++;
}
indptr_out[i+1] = indptr_out[i] + edge_size;
}
for (size_t i = 0; i < subg_csr->edge_ids.size(); i++)
subg_csr->edge_ids[i] = i;
if (neigh_type == "in")
subg.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph()));
else
subg.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph()));
return subg;
}
void CompactSubgraph(ImmutableGraph::CSR *subg,
const std::unordered_map<dgl_id_t, dgl_id_t> &id_map) {
for (size_t i = 0; i < subg->indices.size(); i++) {
auto it = id_map.find(subg->indices[i]);
CHECK(it != id_map.end());
subg->indices[i] = it->second;
}
}
void ImmutableGraph::CompactSubgraph(IdArray induced_vertices) {
// The key is the old id, the value is the id in the subgraph.
std::unordered_map<dgl_id_t, dgl_id_t> id_map;
const dgl_id_t *vdata = static_cast<dgl_id_t *>(induced_vertices->data);
size_t len = induced_vertices->shape[0];
for (size_t i = 0; i < len; i++)
id_map.insert(std::pair<dgl_id_t, dgl_id_t>(vdata[i], i));
if (in_csr_)
dgl::CompactSubgraph(in_csr_.get(), id_map);
if (out_csr_)
dgl::CompactSubgraph(out_csr_.get(), id_map);
}
SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds,
const std::string &neigh_type,
int num_hops, int expand_factor) const {
auto ret = SampleSubgraph(seeds, // seed vector
nullptr, // sample_id_probability
neigh_type,
num_hops,
expand_factor);
std::static_pointer_cast<ImmutableGraph>(ret.graph)->CompactSubgraph(ret.induced_vertices);
return ret;
}
} // namespace dgl
...@@ -48,7 +48,7 @@ DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketingForFullGra ...@@ -48,7 +48,7 @@ DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketingForFullGra
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const Graph* gptr = static_cast<Graph*>(ghandle);
const auto& edges = gptr->Edges(false); const auto& edges = gptr->Edges("");
int64_t n_vertices = gptr->NumVertices(); int64_t n_vertices = gptr->NumVertices();
IdArray nids = IdArray::Empty({n_vertices}, edges.dst->dtype, edges.dst->ctx); IdArray nids = IdArray::Empty({n_vertices}, edges.dst->dtype, edges.dst->ctx);
int64_t* nid_data = static_cast<int64_t*>(nids->data); int64_t* nid_data = static_cast<int64_t*>(nids->data);
......
import os import backend as F
os.environ['DGLBACKEND'] = 'mxnet' import networkx as nx
import mxnet as mx
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import dgl import dgl
from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index
from dgl import utils from dgl import utils
def generate_from_networkx():
edges = [[2, 3], [2, 5], [3, 0], [1, 0], [4, 3], [4, 5]]
nx_graph = nx.DiGraph()
nx_graph.add_edges_from(edges)
g = create_graph_index(nx_graph)
ig = create_graph_index(nx_graph, readonly=True)
return g, ig
def generate_from_edgelist():
edges = [[2, 3], [2, 5], [3, 0], [6, 10], [10, 3], [10, 15]]
g = create_graph_index(edges)
ig = create_graph_index(edges, readonly=True)
return g, ig
def generate_rand_graph(n): def generate_rand_graph(n):
arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64) arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
g = create_graph_index(arr) g = create_graph_index(arr)
...@@ -14,64 +27,87 @@ def generate_rand_graph(n): ...@@ -14,64 +27,87 @@ def generate_rand_graph(n):
return g, ig return g, ig
def check_graph_equal(g1, g2): def check_graph_equal(g1, g2):
adj1 = g1.adjacency_matrix(False, mx.cpu())[0] != 0 adj1 = g1.adjacency_matrix(False, F.cpu())[0]
adj2 = g2.adjacency_matrix(False, mx.cpu())[0] != 0 adj2 = g2.adjacency_matrix(False, F.cpu())[0]
assert mx.nd.sum(adj1 - adj2).asnumpy() == 0 assert np.all(F.asnumpy(adj1) == F.asnumpy(adj2))
def test_graph_gen(): def test_graph_gen():
g, ig = generate_from_edgelist()
check_graph_equal(g, ig)
g, ig = generate_rand_graph(10) g, ig = generate_rand_graph(10)
check_graph_equal(g, ig) check_graph_equal(g, ig)
def sort_edges(edges):
edges = [e.tousertensor() for e in edges]
if np.prod(edges[2].shape) > 0:
val, idx = F.sort_1d(edges[2])
return (edges[0][idx], edges[1][idx], edges[2][idx])
else:
return (edges[0], edges[1], edges[2])
def check_basics(g, ig): def check_basics(g, ig):
assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_nodes() == ig.number_of_nodes()
assert g.number_of_edges() == ig.number_of_edges() assert g.number_of_edges() == ig.number_of_edges()
edges = g.edges() edges = g.edges("srcdst")
iedges = ig.edges() iedges = ig.edges("srcdst")
assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())
edges = g.edges("eid")
iedges = ig.edges("eid")
assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())
for i in range(g.number_of_nodes()): for i in range(g.number_of_nodes()):
assert g.has_node(i) == ig.has_node(i) assert g.has_node(i) == ig.has_node(i)
for i in range(g.number_of_nodes()): for i in range(g.number_of_nodes()):
assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy() assert F.array_equal(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor())
assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy() assert F.array_equal(g.successors(i).tousertensor(), ig.successors(i).tousertensor())
randv = np.random.randint(0, g.number_of_nodes(), 10) randv = np.random.randint(0, g.number_of_nodes(), 10)
randv = utils.toindex(randv) randv = utils.toindex(randv)
in_src1, in_dst1, in_eids1 = g.in_edges(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv))
in_src2, in_dst2, in_eids2 = ig.in_edges(randv) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv))
nnz = in_src2.tousertensor().shape[0] nnz = in_src2.shape[0]
assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_src1, in_src2)
assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_dst1, in_dst2)
assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_eids1, in_eids2)
out_src1, out_dst1, out_eids1 = g.out_edges(randv) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv))
out_src2, out_dst2, out_eids2 = ig.out_edges(randv) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv))
nnz = out_dst2.tousertensor().shape[0] nnz = out_dst2.shape[0]
assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_dst1, out_dst2)
assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_src1, out_src2)
assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_eids1, out_eids2)
num_v = len(randv) num_v = len(randv)
assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v assert F.array_equal(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor())
assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v assert F.array_equal(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor())
randv = randv.tousertensor() randv = randv.tousertensor()
for v in randv.asnumpy(): for v in F.asnumpy(randv):
assert g.in_degree(v) == ig.in_degree(v) assert g.in_degree(v) == ig.in_degree(v)
assert g.out_degree(v) == ig.out_degree(v) assert g.out_degree(v) == ig.out_degree(v)
for u in randv.asnumpy(): for u in F.asnumpy(randv):
for v in randv.asnumpy(): for v in F.asnumpy(randv):
if len(g.edge_id(u, v)) == 1: if len(g.edge_id(u, v)) == 1:
assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy()
assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
randv = utils.toindex(randv) randv = utils.toindex(randv)
ids = g.edge_ids(randv, randv)[2].tonumpy() ids = g.edge_ids(randv, randv)[2].tonumpy()
assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids) == len(ids) assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids)
assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy()) == len(randv) assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy(), 0) == len(randv)
def test_basics(): def test_basics():
g, ig = generate_from_edgelist()
check_basics(g, ig)
g, ig = generate_from_networkx()
check_basics(g, ig)
g, ig = generate_rand_graph(100) g, ig = generate_rand_graph(100)
check_basics(g, ig) check_basics(g, ig)
...@@ -84,9 +120,10 @@ def test_node_subgraph(): ...@@ -84,9 +120,10 @@ def test_node_subgraph():
randv = np.unique(randv1) randv = np.unique(randv1)
subg = g.node_subgraph(utils.toindex(randv)) subg = g.node_subgraph(utils.toindex(randv))
subig = ig.node_subgraph(utils.toindex(randv)) subig = ig.node_subgraph(utils.toindex(randv))
check_basics(subg, subig)
check_graph_equal(subg, subig) check_graph_equal(subg, subig)
assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor() assert F.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor()
== map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10 == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor(), 0) == 10
# node_subgraphs # node_subgraphs
randvs = [] randvs = []
...@@ -97,6 +134,7 @@ def test_node_subgraph(): ...@@ -97,6 +134,7 @@ def test_node_subgraph():
subgs.append(g.node_subgraph(utils.toindex(randv))) subgs.append(g.node_subgraph(utils.toindex(randv)))
subigs= ig.node_subgraphs(randvs) subigs= ig.node_subgraphs(randvs)
for i in range(4): for i in range(4):
check_basics(subg, subig)
check_graph_equal(subgs[i], subigs[i]) check_graph_equal(subgs[i], subigs[i])
def test_create_graph(): def test_create_graph():
...@@ -110,6 +148,7 @@ def test_create_graph(): ...@@ -110,6 +148,7 @@ def test_create_graph():
rows = [1, 0, 0] rows = [1, 0, 0]
cols = [2, 1, 2] cols = [2, 1, 2]
mat = sp.sparse.coo_matrix((data, (rows, cols))) mat = sp.sparse.coo_matrix((data, (rows, cols)))
g = dgl.DGLGraph(mat, readonly=False)
ig = dgl.DGLGraph(mat, readonly=True) ig = dgl.DGLGraph(mat, readonly=True)
for edge in elist: for edge in elist:
assert g.edge_id(edge[0], edge[1]) == ig.edge_id(edge[0], edge[1]) assert g.edge_id(edge[0], edge[1]) == ig.edge_id(edge[0], edge[1])
......
import os import backend as F
os.environ['DGLBACKEND'] = 'mxnet'
import mxnet as mx
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import dgl import dgl
...@@ -19,7 +17,7 @@ def test_1neighbor_sampler_all(): ...@@ -19,7 +17,7 @@ def test_1neighbor_sampler_all():
assert len(seed_ids) == 1 assert len(seed_ids) == 1
src, dst, eid = g.in_edges(seed_ids, form='all') src, dst, eid = g.in_edges(seed_ids, form='all')
# Test if there is a self loop # Test if there is a self loop
self_loop = mx.nd.sum(src == dst).asnumpy() == 1 self_loop = F.asnumpy(F.sum(src == dst, 0)) == 1
if self_loop: if self_loop:
assert subg.number_of_nodes() == len(src) assert subg.number_of_nodes() == len(src)
else: else:
...@@ -30,22 +28,22 @@ def test_1neighbor_sampler_all(): ...@@ -30,22 +28,22 @@ def test_1neighbor_sampler_all():
child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all')
child_src1 = subg.map_to_subgraph_nid(src) child_src1 = subg.map_to_subgraph_nid(src)
assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src)
def is_sorted(arr): def is_sorted(arr):
return np.sum(np.sort(arr) == arr) == len(arr) return np.sum(np.sort(arr) == arr, 0) == len(arr)
def verify_subgraph(g, subg, seed_id): def verify_subgraph(g, subg, seed_id):
src, dst, eid = g.in_edges(seed_id, form='all') src, dst, eid = g.in_edges(seed_id, form='all')
child_id = subg.map_to_subgraph_nid(seed_id) child_id = subg.map_to_subgraph_nid(seed_id)
child_src, child_dst, child_eid = subg.in_edges(child_id, form='all') child_src, child_dst, child_eid = subg.in_edges(child_id, form='all')
child_src = child_src.asnumpy() child_src = F.asnumpy(child_src)
# We don't allow duplicate elements in the neighbor list. # We don't allow duplicate elements in the neighbor list.
assert(len(np.unique(child_src)) == len(child_src)) assert(len(np.unique(child_src)) == len(child_src))
# The neighbor list also needs to be sorted. # The neighbor list also needs to be sorted.
assert(is_sorted(child_src)) assert(is_sorted(child_src))
child_src1 = subg.map_to_subgraph_nid(src).asnumpy() child_src1 = F.asnumpy(subg.map_to_subgraph_nid(src))
child_src1 = child_src1[child_src1 >= 0] child_src1 = child_src1[child_src1 >= 0]
for i in child_src: for i in child_src:
assert i in child_src1 assert i in child_src1
...@@ -84,7 +82,7 @@ def test_10neighbor_sampler_all(): ...@@ -84,7 +82,7 @@ def test_10neighbor_sampler_all():
child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all')
child_src1 = subg.map_to_subgraph_nid(src) child_src1 = subg.map_to_subgraph_nid(src)
assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src)
def check_10neighbor_sampler(g, seeds): def check_10neighbor_sampler(g, seeds):
# In this case, NeighborSampling simply gets the neighborhood of a single vertex. # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment