"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "0dd0528851bfa48d27ba68712d7df18ff619d22f"
Unverified Commit 929742b5 authored by Da Zheng's avatar Da Zheng Committed by GitHub
Browse files

[Feature] Reimplement Immutable graph index in DGL (#342)

* init.

* it's compiled.

* add immutable graph constructor.

* add immutable graph API.

* fix.

* impl get adjacency matrix.

* fix.

* fix graph_index from scipy matrix.

* add neighbor sampling.

* remap vertex ids.

* fix.

* move sampler test.

* fix tests.

* add comments

* remove mxnet-specific immutable graph.

* fix.

* fix lint.

* fix.

* try to fix windows compile error.

* fix.

* fix.

* add test.

* unify Graph and ImmutableGraph.

* fix bugs.

* fix compile.

* move immutable graph.

* fix.

* remove print.

* fix lint.

* fix

* fix lint.

* fix lint.

* fix test.

* fix comments.

* merge GraphIndex and ImmutableGraphIndex.

* temp fix.

* impl GetAdj.

* fix lint

* fix.

* fix.

* fix.

* fix.

* fix.

* use csr only for readonly graph.

* Revert "use csr only for readonly graph."

This reverts commit 8e24bb033af8504531b22849de5b7567b168e0d5.

* remove code.

* fix.

* fix.

* fix.

* fix.

* fix.

* fix.

* address comments.

* fix for comments.

* fix comments.

* revert.

* move test_graph_index to compute.

* fix.

* fix.

* impl GetAdj for coo.

* fix.

* fix tests.

* address comments.

* address comments.

* fix comment.

* address comments.

* use lambda.

* other comments.

* address comments.

* modify the semantics of edges.

* fix order.

* use DGLIdIter

* fix.

* remove NotImplemented.

* revert some code.
parent ed1948b5
...@@ -10,26 +10,18 @@ ...@@ -10,26 +10,18 @@
#include <cstdint> #include <cstdint>
#include <utility> #include <utility>
#include <tuple> #include <tuple>
#include "runtime/ndarray.h"
namespace dgl { #include "graph_interface.h"
typedef uint64_t dgl_id_t; namespace dgl {
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
class Graph; class Graph;
class GraphOp; class GraphOp;
struct Subgraph;
/*! /*!
* \brief Base dgl graph index class. * \brief Base dgl graph index class.
* *
* DGL's graph is directed. Vertices are integers enumerated from zero. Edges * DGL's graph is directed. Vertices are integers enumerated from zero.
* are uniquely identified by the two endpoints. Multi-edge is currently not
* supported.
* *
* Removal of vertices/edges is not allowed. Instead, the graph can only be "cleared" * Removal of vertices/edges is not allowed. Instead, the graph can only be "cleared"
* by removing all the vertices and edges. * by removing all the vertices and edges.
...@@ -40,17 +32,15 @@ struct Subgraph; ...@@ -40,17 +32,15 @@ struct Subgraph;
* If the length of src id array is one, it represents one-many connections. * If the length of src id array is one, it represents one-many connections.
* If the length of dst id array is one, it represents many-one connections. * If the length of dst id array is one, it represents many-one connections.
*/ */
class Graph { class Graph: public GraphInterface {
public: public:
/* \brief structure used to represent a list of edges */
typedef struct {
/* \brief the two endpoints and the id of the edge */
IdArray src, dst, id;
} EdgeArray;
/*! \brief default constructor */ /*! \brief default constructor */
explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {} explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief construct a graph from the coo format. */
Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
/*! \brief default copy constructor */ /*! \brief default copy constructor */
Graph(const Graph& other) = default; Graph(const Graph& other) = default;
...@@ -118,6 +108,13 @@ class Graph { ...@@ -118,6 +108,13 @@ class Graph {
return is_multigraph_; return is_multigraph_;
} }
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const {
return false;
}
/*! \return the number of vertices in the graph.*/ /*! \return the number of vertices in the graph.*/
uint64_t NumVertices() const { uint64_t NumVertices() const {
return adjlist_.size(); return adjlist_.size();
...@@ -232,7 +229,7 @@ class Graph { ...@@ -232,7 +229,7 @@ class Graph {
* \param sorted Whether the returned edge list is sorted by their src and dst ids * \param sorted Whether the returned edge list is sorted by their src and dst ids
* \return the id arrays of the two endpoints of the edges. * \return the id arrays of the two endpoints of the edges.
*/ */
EdgeArray Edges(bool sorted = false) const; EdgeArray Edges(const std::string &order = "") const;
/*! /*!
* \brief Get the in degree of the given vertex. * \brief Get the in degree of the given vertex.
...@@ -311,15 +308,15 @@ class Graph { ...@@ -311,15 +308,15 @@ class Graph {
* *
* \return the reversed graph * \return the reversed graph
*/ */
Graph Reverse() const; GraphPtr Reverse() const;
/*! /*!
* \brief Return the successor vector * \brief Return the successor vector
* \param vid The vertex id. * \param vid The vertex id.
* \return the successor vector * \return the successor vector
*/ */
const std::vector<dgl_id_t>& SuccVec(dgl_id_t vid) const { DGLIdIters SuccVec(dgl_id_t vid) const {
return adjlist_[vid].succ; return DGLIdIters(adjlist_[vid].succ.begin(), adjlist_[vid].succ.end());
} }
/*! /*!
...@@ -327,8 +324,8 @@ class Graph { ...@@ -327,8 +324,8 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the out edge id vector * \return the out edge id vector
*/ */
const std::vector<dgl_id_t>& OutEdgeVec(dgl_id_t vid) const { DGLIdIters OutEdgeVec(dgl_id_t vid) const {
return adjlist_[vid].edge_id; return DGLIdIters(adjlist_[vid].edge_id.begin(), adjlist_[vid].edge_id.end());
} }
/*! /*!
...@@ -336,8 +333,8 @@ class Graph { ...@@ -336,8 +333,8 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the predecessor vector * \return the predecessor vector
*/ */
const std::vector<dgl_id_t>& PredVec(dgl_id_t vid) const { DGLIdIters PredVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].succ; return DGLIdIters(reverse_adjlist_[vid].succ.begin(), reverse_adjlist_[vid].succ.end());
} }
/*! /*!
...@@ -345,8 +342,41 @@ class Graph { ...@@ -345,8 +342,41 @@ class Graph {
* \param vid The vertex id. * \param vid The vertex id.
* \return the in edge id vector * \return the in edge id vector
*/ */
const std::vector<dgl_id_t>& InEdgeVec(dgl_id_t vid) const { DGLIdIters InEdgeVec(dgl_id_t vid) const {
return reverse_adjlist_[vid].edge_id; return DGLIdIters(reverse_adjlist_[vid].edge_id.begin(),
reverse_adjlist_[vid].edge_id.end());
}
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() {
Graph* gptr = new Graph();
*gptr = std::move(*this);
return gptr;
}
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of three IdArray.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const;
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const {
LOG(FATAL) << "NeighborUniformSample isn't supported in mutable graph";
return SampledSubgraph();
} }
protected: protected:
...@@ -382,22 +412,6 @@ class Graph { ...@@ -382,22 +412,6 @@ class Graph {
uint64_t num_edges_ = 0; uint64_t num_edges_ = 0;
}; };
/*! \brief Subgraph data structure */
struct Subgraph {
/*! \brief The graph. */
Graph graph;
/*!
* \brief The induced vertex ids.
* \note This is also a map from the new vertex id to the vertex id in the parent graph.
*/
IdArray induced_vertices;
/*!
* \brief The induced edge ids.
* \note This is also a map from the new edge id to the edge id in the parent graph.
*/
IdArray induced_edges;
};
} // namespace dgl } // namespace dgl
#endif // DGL_GRAPH_H_ #endif // DGL_GRAPH_H_
/*!
* Copyright (c) 2018 by Contributors
* \file dgl/graph_interface.h
* \brief DGL graph index class.
*/
#ifndef DGL_GRAPH_INTERFACE_H_
#define DGL_GRAPH_INTERFACE_H_
#include <string>
#include <vector>
#include <utility>
#include "runtime/ndarray.h"
namespace dgl {
typedef uint64_t dgl_id_t;
typedef dgl::runtime::NDArray IdArray;
typedef dgl::runtime::NDArray DegreeArray;
typedef dgl::runtime::NDArray BoolArray;
typedef dgl::runtime::NDArray IntArray;
struct Subgraph;
struct SampledSubgraph;
/*!
* \brief This class references data in std::vector.
*
* This isn't a STL-style iterator. It provides a STL data container interface.
* but it doesn't own data itself. instead, it only references data in std::vector.
*/
class DGLIdIters {
std::vector<dgl_id_t>::const_iterator begin_, end_;
public:
DGLIdIters(std::vector<dgl_id_t>::const_iterator begin,
std::vector<dgl_id_t>::const_iterator end) {
this->begin_ = begin;
this->end_ = end;
}
std::vector<dgl_id_t>::const_iterator begin() const {
return this->begin_;
}
std::vector<dgl_id_t>::const_iterator end() const {
return this->end_;
}
dgl_id_t operator[](int64_t i) const {
return *(this->begin_ + i);
}
size_t size() const {
return this->end_ - this->begin_;
}
};
class GraphInterface;
typedef std::shared_ptr<GraphInterface> GraphPtr;
/*!
* \brief dgl graph index interface.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
*/
class GraphInterface {
public:
/* \brief structure used to represent a list of edges */
typedef struct {
/* \brief the two endpoints and the id of the edge */
IdArray src, dst, id;
} EdgeArray;
virtual ~GraphInterface() = default;
/*!
* \brief Add vertices to the graph.
* \note Since vertices are integers enumerated from zero, only the number of
* vertices to be added needs to be specified.
* \param num_vertices The number of vertices to be added.
*/
virtual void AddVertices(uint64_t num_vertices) = 0;
/*!
* \brief Add one edge to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
*/
virtual void AddEdge(dgl_id_t src, dgl_id_t dst) = 0;
/*!
* \brief Add edges to the graph.
* \param src_ids The source vertex id array.
* \param dst_ids The destination vertex id array.
*/
virtual void AddEdges(IdArray src_ids, IdArray dst_ids) = 0;
/*!
* \brief Clear the graph. Remove all vertices/edges.
*/
virtual void Clear() = 0;
/*!
* \note not const since we have caches
* \return whether the graph is a multigraph
*/
virtual bool IsMultigraph() const = 0;
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const = 0;
/*! \return the number of vertices in the graph.*/
virtual uint64_t NumVertices() const = 0;
/*! \return the number of edges in the graph.*/
virtual uint64_t NumEdges() const = 0;
/*! \return true if the given vertex is in the graph.*/
virtual bool HasVertex(dgl_id_t vid) const = 0;
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
virtual BoolArray HasVertices(IdArray vids) const = 0;
/*! \return true if the given edge is in the graph.*/
virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0;
/*!
* \brief Find the predecessors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the predecessor id array.
*/
virtual IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const = 0;
/*!
* \brief Find the successors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the successor id array.
*/
virtual IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const = 0;
/*!
* \brief Get all edge ids between the two given endpoints
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
* \return the edge id array.
*/
virtual IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const = 0;
/*!
* \brief Get all edge ids between the given endpoint pairs.
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* If duplicate pairs exist, the returned edge IDs will also duplicate.
* The order of returned edge IDs will follow the order of src-dst pairs
* first, and ties are broken by the order of edge ID.
* \return EdgeArray containing all edges between all pairs.
*/
virtual EdgeArray EdgeIds(IdArray src, IdArray dst) const = 0;
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
virtual std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const = 0;
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
* \return EdgeArray containing all edges with id in eid. The order is preserved.
*/
virtual EdgeArray FindEdges(IdArray eids) const = 0;
/*!
* \brief Get the in edges of the vertex.
* \note The returned dst id array is filled with vid.
* \param vid The vertex id.
* \return the edges
*/
virtual EdgeArray InEdges(dgl_id_t vid) const = 0;
/*!
* \brief Get the in edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray InEdges(IdArray vids) const = 0;
/*!
* \brief Get the out edges of the vertex.
* \note The returned src id array is filled with vid.
* \param vid The vertex id.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray OutEdges(dgl_id_t vid) const = 0;
/*!
* \brief Get the out edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray OutEdges(IdArray vids) const = 0;
/*!
* \brief Get all the edges in the graph.
* \note If order is "srcdst", the returned edges list is sorted by their src and
* dst ids. If order is "eid", they are in their edge id order.
* Otherwise, in the arbitrary order.
* \param order The order of the returned edge list.
* \return the id arrays of the two endpoints of the edges.
*/
virtual EdgeArray Edges(const std::string &order = "") const = 0;
/*!
* \brief Get the in degree of the given vertex.
* \param vid The vertex id.
* \return the in degree
*/
virtual uint64_t InDegree(dgl_id_t vid) const = 0;
/*!
* \brief Get the in degrees of the given vertices.
* \param vid The vertex id array.
* \return the in degree array
*/
virtual DegreeArray InDegrees(IdArray vids) const = 0;
/*!
* \brief Get the out degree of the given vertex.
* \param vid The vertex id.
* \return the out degree
*/
virtual uint64_t OutDegree(dgl_id_t vid) const = 0;
/*!
* \brief Get the out degrees of the given vertices.
* \param vid The vertex id array.
* \return the out degree array
*/
virtual DegreeArray OutDegrees(IdArray vids) const = 0;
/*!
* \brief Construct the induced subgraph of the given vertices.
*
* The induced subgraph is a subgraph formed by specifying a set of vertices V' and then
* selecting all of the edges from the original graph that connect two vertices in V'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the vertices preserve the order of the given id array, while the local index
* of the edges preserve the index order in the original graph. Vertices not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param vids The vertices in the subgraph.
* \return the induced subgraph
*/
virtual Subgraph VertexSubgraph(IdArray vids) const = 0;
/*!
* \brief Construct the induced edge subgraph of the given edges.
*
* The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then
* selecting all of the nodes from the original graph that are endpoints in E'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the edges preserve the order of the given id array, while the local index
* of the vertices preserve the index order in the original graph. Edges not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param eids The edges in the subgraph.
* \return the induced edge subgraph
*/
virtual Subgraph EdgeSubgraph(IdArray eids) const = 0;
/*!
* \brief Return a new graph with all the edges reversed.
*
* The returned graph preserves the vertex and edge index in the original graph.
*
* \return the reversed graph
*/
virtual GraphPtr Reverse() const = 0;
/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector iterator pair.
*/
virtual DGLIdIters SuccVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector iterator pair.
*/
virtual DGLIdIters OutEdgeVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector iterator pair.
*/
virtual DGLIdIters PredVec(dgl_id_t vid) const = 0;
/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector iterator pair.
*/
virtual DGLIdIters InEdgeVec(dgl_id_t vid) const = 0;
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() = 0;
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of IdArrays.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const = 0;
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const = 0;
};
/*! \brief Subgraph data structure */
struct Subgraph {
/*! \brief The graph. */
GraphPtr graph;
/*!
* \brief The induced vertex ids.
* \note This is also a map from the new vertex id to the vertex id in the parent graph.
*/
IdArray induced_vertices;
/*!
* \brief The induced edge ids.
* \note This is also a map from the new edge id to the edge id in the parent graph.
*/
IdArray induced_edges;
};
/*!
* \brief When we sample a subgraph, we need to store extra information,
* such as the layer Ids of the vertices and the sampling probability.
*/
struct SampledSubgraph: public Subgraph {
/*!
* \brief the layer of a sampled vertex in the subgraph.
*/
IdArray layer_ids;
/*!
* \brief the probability that a vertex is sampled.
*/
runtime::NDArray sample_prob;
};
} // namespace dgl
#endif // DGL_GRAPH_INTERFACE_H_
/*!
* Copyright (c) 2018 by Contributors
* \file dgl/immutable_graph.h
* \brief DGL immutable graph index class.
*/
#ifndef DGL_IMMUTABLE_GRAPH_H_
#define DGL_IMMUTABLE_GRAPH_H_
#include <vector>
#include <string>
#include <cstdint>
#include <utility>
#include <tuple>
#include "runtime/ndarray.h"
#include "graph_interface.h"
namespace dgl {
/*!
* \brief DGL immutable graph index class.
*
* DGL's graph is directed. Vertices are integers enumerated from zero.
*/
class ImmutableGraph: public GraphInterface {
public:
typedef struct {
IdArray indptr, indices, id;
} CSRArray;
struct Edge {
dgl_id_t end_points[2];
dgl_id_t edge_id;
};
struct CSR {
typedef std::shared_ptr<CSR> Ptr;
std::vector<int64_t> indptr;
std::vector<dgl_id_t> indices;
std::vector<dgl_id_t> edge_ids;
CSR(int64_t num_vertices, int64_t expected_num_edges) {
indptr.resize(num_vertices + 1);
indices.reserve(expected_num_edges);
edge_ids.reserve(expected_num_edges);
}
bool HasVertex(dgl_id_t vid) const {
return vid < NumVertices();
}
uint64_t NumVertices() const {
return indptr.size() - 1;
}
uint64_t NumEdges() const {
return indices.size();
}
int64_t GetDegree(dgl_id_t vid) const {
return indptr[vid + 1] - indptr[vid];
}
DegreeArray GetDegrees(IdArray vids) const;
EdgeArray GetEdges(dgl_id_t vid) const;
EdgeArray GetEdges(IdArray vids) const;
/* \brief this returns the start and end position of the column indices corresponding v. */
DGLIdIters GetIndexRef(dgl_id_t v) const {
const int64_t start = indptr[v];
const int64_t end = indptr[v + 1];
return DGLIdIters(indices.begin() + start, indices.begin() + end);
}
/*
* Read all edges and store them in the vector.
*/
void ReadAllEdges(std::vector<Edge> *edges) const;
CSR::Ptr Transpose() const;
std::pair<CSR::Ptr, IdArray> VertexSubgraph(IdArray vids) const;
/*
* Construct a CSR from a list of edges.
*
* When constructing a CSR, we need to sort the edge list. To reduce the overhead,
* we simply sort on the input edge list. We allow sorting on both end points of an edge,
* which is specified by `sort_on`.
*/
static CSR::Ptr FromEdges(std::vector<Edge> *edges, int sort_on, int64_t num_nodes);
};
/*! \brief Construct an immutable graph from the COO format. */
ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph = false);
/*!
* \brief Construct an immutable graph from the CSR format.
*
* For a single graph, we need two CSRs, one stores the in-edges of vertices and
* the other stores the out-edges of vertices. These two CSRs stores the same edges.
* The reason we need both is that some operators are faster on in-edge CSR and
* the other operators are faster on out-edge CSR.
*
* However, not both CSRs are required. Technically, one CSR contains all information.
* Thus, when we construct a temporary graphs (e.g., the sampled subgraphs), we only
* construct one of the CSRs that runs fast for some operations we expect and construct
* the other CSR on demand.
*/
ImmutableGraph(CSR::Ptr in_csr, CSR::Ptr out_csr,
bool multigraph = false) : is_multigraph_(multigraph) {
this->in_csr_ = in_csr;
this->out_csr_ = out_csr;
CHECK(this->in_csr_ != nullptr || this->out_csr_ != nullptr)
<< "there must exist one of the CSRs";
}
/*! \brief default constructor */
explicit ImmutableGraph(bool multigraph = false) : is_multigraph_(multigraph) {}
/*! \brief default copy constructor */
ImmutableGraph(const ImmutableGraph& other) = default;
#ifndef _MSC_VER
/*! \brief default move constructor */
ImmutableGraph(ImmutableGraph&& other) = default;
#else
ImmutableGraph(ImmutableGraph&& other) {
this->in_csr_ = other.in_csr_;
this->out_csr_ = other.out_csr_;
this->is_multigraph_ = other.is_multigraph_;
other.in_csr_ = nullptr;
other.out_csr_ = nullptr;
}
#endif // _MSC_VER
/*! \brief default assign constructor */
ImmutableGraph& operator=(const ImmutableGraph& other) = default;
/*! \brief default destructor */
~ImmutableGraph() = default;
/*!
* \brief Add vertices to the graph.
* \note Since vertices are integers enumerated from zero, only the number of
* vertices to be added needs to be specified.
* \param num_vertices The number of vertices to be added.
*/
void AddVertices(uint64_t num_vertices) {
LOG(FATAL) << "AddVertices isn't supported in ImmutableGraph";
}
/*!
* \brief Add one edge to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
*/
void AddEdge(dgl_id_t src, dgl_id_t dst) {
LOG(FATAL) << "AddEdge isn't supported in ImmutableGraph";
}
/*!
* \brief Add edges to the graph.
* \param src_ids The source vertex id array.
* \param dst_ids The destination vertex id array.
*/
void AddEdges(IdArray src_ids, IdArray dst_ids) {
LOG(FATAL) << "AddEdges isn't supported in ImmutableGraph";
}
/*!
* \brief Clear the graph. Remove all vertices/edges.
*/
void Clear() {
LOG(FATAL) << "Clear isn't supported in ImmutableGraph";
}
/*!
* \note not const since we have caches
* \return whether the graph is a multigraph
*/
bool IsMultigraph() const {
return is_multigraph_;
}
/*!
* \return whether the graph is read-only
*/
virtual bool IsReadonly() const {
return true;
}
/*! \return the number of vertices in the graph.*/
uint64_t NumVertices() const {
if (in_csr_)
return in_csr_->NumVertices();
else
return out_csr_->NumVertices();
}
/*! \return the number of edges in the graph.*/
uint64_t NumEdges() const {
if (in_csr_)
return in_csr_->NumEdges();
else
return out_csr_->NumEdges();
}
/*! \return true if the given vertex is in the graph.*/
bool HasVertex(dgl_id_t vid) const {
return vid < NumVertices();
}
/*! \return a 0-1 array indicating whether the given vertices are in the graph.*/
BoolArray HasVertices(IdArray vids) const;
/*! \return true if the given edge is in the graph.*/
bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const;
/*! \return a 0-1 array indicating whether the given edges are in the graph.*/
BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const;
/*!
* \brief Find the predecessors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the predecessor id array.
*/
IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const;
/*!
* \brief Find the successors of a vertex.
* \param vid The vertex id.
* \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1).
* \return the successor id array.
*/
IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const;
/*!
* \brief Get all edge ids between the two given endpoints
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* \param src The source vertex.
* \param dst The destination vertex.
* \return the edge id array.
*/
IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const;
/*!
* \brief Get all edge ids between the given endpoint pairs.
* \note Edges are associated with an integer id start from zero.
* The id is assigned when the edge is being added to the graph.
* If duplicate pairs exist, the returned edge IDs will also duplicate.
* The order of returned edge IDs will follow the order of src-dst pairs
* first, and ties are broken by the order of edge ID.
* \return EdgeArray containing all edges between all pairs.
*/
EdgeArray EdgeIds(IdArray src, IdArray dst) const;
/*!
* \brief Find the edge ID and return the pair of endpoints
* \param eid The edge ID
* \return a pair whose first element is the source and the second the destination.
*/
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_id_t eid) const {
LOG(FATAL) << "FindEdge isn't supported in ImmutableGraph";
return std::pair<dgl_id_t, dgl_id_t>();
}
/*!
* \brief Find the edge IDs and return their source and target node IDs.
* \param eids The edge ID array.
* \return EdgeArray containing all edges with id in eid. The order is preserved.
*/
EdgeArray FindEdges(IdArray eids) const {
LOG(FATAL) << "FindEdges isn't supported in ImmutableGraph";
return EdgeArray();
}
/*!
* \brief Get the in edges of the vertex.
* \note The returned dst id array is filled with vid.
* \param vid The vertex id.
* \return the edges
*/
EdgeArray InEdges(dgl_id_t vid) const {
return this->GetInCSR()->GetEdges(vid);
}
/*!
* \brief Get the in edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray InEdges(IdArray vids) const {
return this->GetInCSR()->GetEdges(vids);
}
/*!
* \brief Get the out edges of the vertex.
* \note The returned src id array is filled with vid.
* \param vid The vertex id.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(dgl_id_t vid) const {
auto ret = this->GetOutCSR()->GetEdges(vid);
// We should reverse the source and destination in the edge array.
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
}
/*!
* \brief Get the out edges of the vertices.
* \param vids The vertex id array.
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray OutEdges(IdArray vids) const {
auto ret = this->GetOutCSR()->GetEdges(vids);
return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id};
}
/*!
* \brief Get all the edges in the graph.
* \note If sorted is true, the returned edges list is sorted by their src and
* dst ids. Otherwise, they are in their edge id order.
* \param sorted Whether the returned edge list is sorted by their src and dst ids
* \return the id arrays of the two endpoints of the edges.
*/
EdgeArray Edges(const std::string &order = "") const;
/*!
* \brief Get the in degree of the given vertex.
* \param vid The vertex id.
* \return the in degree
*/
uint64_t InDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetInCSR()->GetDegree(vid);
}
/*!
* \brief Get the in degrees of the given vertices.
* \param vid The vertex id array.
* \return the in degree array
*/
DegreeArray InDegrees(IdArray vids) const {
return this->GetInCSR()->GetDegrees(vids);
}
/*!
* \brief Get the out degree of the given vertex.
* \param vid The vertex id.
* \return the out degree
*/
uint64_t OutDegree(dgl_id_t vid) const {
CHECK(HasVertex(vid)) << "invalid vertex: " << vid;
return this->GetOutCSR()->GetDegree(vid);
}
/*!
* \brief Get the out degrees of the given vertices.
* \param vid The vertex id array.
* \return the out degree array
*/
DegreeArray OutDegrees(IdArray vids) const {
return this->GetOutCSR()->GetDegrees(vids);
}
/*!
* \brief Construct the induced subgraph of the given vertices.
*
* The induced subgraph is a subgraph formed by specifying a set of vertices V' and then
* selecting all of the edges from the original graph that connect two vertices in V'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the vertices preserve the order of the given id array, while the local index
* of the edges preserve the index order in the original graph. Vertices not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param vids The vertices in the subgraph.
* \return the induced subgraph
*/
Subgraph VertexSubgraph(IdArray vids) const;
/*!
* \brief Construct the induced edge subgraph of the given edges.
*
* The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then
* selecting all of the nodes from the original graph that are endpoints in E'.
*
* Vertices and edges in the original graph will be "reindexed" to local index. The local
* index of the edges preserve the order of the given id array, while the local index
* of the vertices preserve the index order in the original graph. Edges not in the
* original graph are ignored.
*
* The result subgraph is read-only.
*
* \param eids The edges in the subgraph.
* \return the induced edge subgraph
*/
Subgraph EdgeSubgraph(IdArray eids) const;
/*!
* \brief Return a new graph with all the edges reversed.
*
* The returned graph preserves the vertex and edge index in the original graph.
*
* \return the reversed graph
*/
GraphPtr Reverse() const {
return GraphPtr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_));
}
/*!
* \brief Return the successor vector
* \param vid The vertex id.
* \return the successor vector
*/
DGLIdIters SuccVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->indices.begin() + out_csr_->indptr[vid],
out_csr_->indices.begin() + out_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the out edge id vector
* \param vid The vertex id.
* \return the out edge id vector
*/
DGLIdIters OutEdgeVec(dgl_id_t vid) const {
return DGLIdIters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid],
out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the predecessor vector
* \param vid The vertex id.
* \return the predecessor vector
*/
DGLIdIters PredVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->indices.begin() + in_csr_->indptr[vid],
in_csr_->indices.begin() + in_csr_->indptr[vid + 1]);
}
/*!
* \brief Return the in edge id vector
* \param vid The vertex id.
* \return the in edge id vector
*/
DGLIdIters InEdgeVec(dgl_id_t vid) const {
return DGLIdIters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid],
in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]);
}
/*!
* \brief Reset the data in the graph and move its data to the returned graph object.
* \return a raw pointer to the graph object.
*/
virtual GraphInterface *Reset() {
ImmutableGraph* gptr = new ImmutableGraph();
*gptr = std::move(*this);
return gptr;
}
/*!
* \brief Sample a subgraph from the seed vertices with neighbor sampling.
* The neighbors are sampled with a uniform distribution.
* \return a subgraph
*/
SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
int num_hops, int expand_factor) const;
/*!
* \brief Get the adjacency matrix of the graph.
*
* By default, a row of returned adjacency matrix represents the destination
* of an edge and the column represents the source.
* \param transpose A flag to transpose the returned adjacency matrix.
* \param fmt the format of the returned adjacency matrix.
* \return a vector of three IdArray.
*/
virtual std::vector<IdArray> GetAdj(bool transpose, const std::string &fmt) const;
protected:
DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const;
/*
* The immutable graph may only contain one of the CSRs (e.g., the sampled subgraphs).
* When we get in csr or out csr, we try to get the one cached in the structure.
* If not, we transpose the other one to get the one we need.
*/
CSR::Ptr GetInCSR() const {
if (in_csr_) {
return in_csr_;
} else {
CHECK(out_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->in_csr_ = out_csr_->Transpose();
return in_csr_;
}
}
CSR::Ptr GetOutCSR() const {
if (out_csr_) {
return out_csr_;
} else {
CHECK(in_csr_ != nullptr) << "one of the CSRs must exist";
const_cast<ImmutableGraph *>(this)->out_csr_ = in_csr_->Transpose();
return out_csr_;
}
}
/*!
* \brief Get the CSR array that represents the in-edges.
* This method copies data from std::vector to IdArray.
* \return the CSR array.
*/
CSRArray GetInCSRArray() const;
/*!
* \brief Get the CSR array that represents the out-edges.
* This method copies data from std::vector to IdArray.
* \return the CSR array.
*/
CSRArray GetOutCSRArray() const;
SampledSubgraph SampleSubgraph(IdArray seed_arr, const float* probability,
const std::string &neigh_type,
int num_hops, size_t num_neighbor) const;
/*!
* \brief Compact a subgraph.
* In a sampled subgraph, the vertex Id is still in the ones in the original graph.
* We want to convert them to the subgraph Ids.
*/
void CompactSubgraph(IdArray induced_vertices);
// Store the in-edges.
CSR::Ptr in_csr_;
// Store the out-edges.
CSR::Ptr out_csr_;
/*!
* \brief Whether if this is a multigraph.
*
* When a multiedge is added, this flag switches to true.
*/
bool is_multigraph_ = false;
};
} // namespace dgl
#endif // DGL_IMMUTABLE_GRAPH_H_
...@@ -74,6 +74,19 @@ def tensor(data, dtype=None): ...@@ -74,6 +74,19 @@ def tensor(data, dtype=None):
""" """
pass pass
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
Returns
-------
string
the name of the preferred sparse matrix format.
"""
pass
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
"""Create a sparse matrix. """Create a sparse matrix.
...@@ -834,7 +847,3 @@ def zerocopy_from_numpy(np_array): ...@@ -834,7 +847,3 @@ def zerocopy_from_numpy(np_array):
# ---------------- # ----------------
# These are not related to tensors. Some of them are temporary workarounds that # These are not related to tensors. Some of them are temporary workarounds that
# should be included in DGL in the future. # should be included in DGL in the future.
def create_immutable_graph_index():
"""Create an immutable graph index object."""
pass
from .tensor import * from .tensor import *
from .immutable_graph_index import create_immutable_graph_index
from __future__ import absolute_import
import ctypes
import numpy as np
import networkx as nx
import scipy.sparse as sp
import mxnet as mx
class ImmutableGraphIndex(object):
"""Backend-specific graph index object on immutable graphs.
We can use a CSR matrix to represent a graph structure. For functionality,
one CSR matrix is sufficient. However, for efficient access
to in-edges and out-edges of a directed graph, we need to use two CSR matrices.
In these CSR matrices, both rows and columns represent vertices. In one CSR
matrix, a row stores in-edges of a vertex (whose source vertex is a neighbor
and destination vertex is the vertex itself). Thus, a non-zero entry is
the neighbor Id and the value is the corresponding edge Id.
The other CSR matrix stores the out-edges in the same fashion.
Parameters
----------
in_csr : a csr array that stores in-edges.
MXNet CSRArray
out_csr : a csr array that stores out-edges.
MXNet CSRArray
"""
def __init__(self, in_csr, out_csr):
self._in_csr = in_csr
self._out_csr = out_csr
self._cached_adj = {}
def number_of_nodes(self):
"""Return the number of nodes.
Returns
-------
int
The number of nodes
"""
return len(self._in_csr)
def number_of_edges(self):
"""Return the number of edges.
Returns
-------
int
The number of edges
"""
return self._in_csr.indices.shape[0]
def has_edges(self, u, v):
"""Return true if the edge exists.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
0-1 array indicating existence
"""
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
return ids >= 0
def edge_ids(self, u, v):
"""Return the edge ids.
Parameters
----------
u : NDArray
The src nodes.
v : NDArray
The dst nodes.
Returns
-------
NDArray
Teh edge id array.
"""
if len(u) == 0 or len(v) == 0:
return [], [], []
ids = mx.nd.contrib.edge_id(self._in_csr, v, u)
ids = ids.asnumpy()
v = v.asnumpy()
u = u.asnumpy()
return u[ids >= 0], v[ids >= 0], ids[ids >= 0]
def predecessors(self, v, radius=1):
"""Return the predecessors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of predecessors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support predecessors with radius > 1 for now.')
return self._in_csr[v].indices
def successors(self, v, radius=1):
"""Return the successors of the node.
Parameters
----------
v : int
The node.
radius : int, optional
The radius of the neighborhood.
Returns
-------
NDArray
Array of successors
"""
if radius > 1:
raise Exception('Immutable graph doesn\'t support successors with radius > 1 for now.')
return self._out_csr[v].indices
def in_edges(self, v):
"""Return the in edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The src nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._in_csr, v)
return rows.indptr, rows.indices, rows.data
def out_edges(self, v):
"""Return the out edges of the node(s).
Parameters
----------
v : NDArray
The node(s).
Returns
-------
NDArray
index pointers
NDArray
The dst nodes.
NDArray
The edge ids.
"""
rows = mx.nd.take(self._out_csr, v)
return rows.indptr, rows.indices, rows.data
def edges(self, sorted=False):
"""Return all the edges
Parameters
----------
sorted : bool
True if the returned edges are sorted by their src and dst ids.
Returns
-------
NDArray
The src nodes.
NDArray
The dst nodes.
NDArray
The edge ids.
"""
#TODO(zhengda) we need to return NDArray directly
# We don't need to take care of the sorted flag because the vertex Ids
# are already sorted.
coo = self._in_csr.asscipy().tocoo()
return coo.col, coo.row, coo.data
def get_in_degree(self):
"""Return the in degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._in_csr, axis=1)
def get_out_degree(self):
"""Return the out degrees of all nodes.
Returns
-------
NDArray
degrees
"""
return mx.nd.contrib.getnnz(self._out_csr, axis=1)
def node_subgraph(self, v):
"""Return the induced node subgraph.
Parameters
----------
v : NDArray
The nodes.
Returns
-------
ImmutableGraphIndex
The subgraph index.
NDArray
Induced nodes
NDArray
Induced edges
"""
v = mx.nd.sort(v)
# when return_mapping is turned on, dgl_subgraph returns another CSRArray that
# stores the edge Ids of the original graph.
csr = mx.nd.contrib.dgl_subgraph(self._in_csr, v, return_mapping=True)
induced_nodes = v
induced_edges = lambda: csr[1].data
return ImmutableGraphIndex(csr[0], None), induced_nodes, induced_edges
def node_subgraphs(self, vs_arr):
"""Return the induced node subgraphs.
Parameters
----------
vs_arr : a vector of NDArray
The nodes.
Returns
-------
a vector of ImmutableGraphIndex
The subgraph index.
a vector of NDArrays
Induced nodes of subgraphs.
a vector of NDArrays
Induced edges of subgraphs.
"""
vs_arr = [mx.nd.sort(v) for v in vs_arr]
res = mx.nd.contrib.dgl_subgraph(self._in_csr, *vs_arr, return_mapping=True)
in_csrs = res[0:len(vs_arr)]
induced_nodes = vs_arr
induced_edges = [lambda: e.data for e in res[len(vs_arr):]]
assert len(in_csrs) == len(induced_nodes)
assert len(in_csrs) == len(induced_edges)
gis = []
induced_ns = []
induced_es = []
for in_csr, induced_n, induced_e in zip(in_csrs, induced_nodes, induced_edges):
gis.append(ImmutableGraphIndex(in_csr, None))
induced_ns.append(induced_n)
induced_es.append(induced_e)
return gis, induced_ns, induced_es
def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type,
node_prob, max_subgraph_size):
if neighbor_type == 'in':
g = self._in_csr
elif neighbor_type == 'out':
g = self._out_csr
else:
raise NotImplementedError
num_nodes = []
num_subgs = len(seed_ids)
if node_prob is None:
res = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(g, *seed_ids, num_hops=num_hops,
num_neighbor=expand_factor,
max_num_vertices=max_subgraph_size)
else:
res = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(g, node_prob, *seed_ids, num_hops=num_hops,
num_neighbor=expand_factor,
max_num_vertices=max_subgraph_size)
vertices, subgraphs = res[0:num_subgs], res[num_subgs:(2*num_subgs)]
num_nodes = [subg_v[-1].asnumpy()[0] for subg_v in vertices]
inputs = []
inputs.extend(subgraphs)
inputs.extend(vertices)
compacts = mx.nd.contrib.dgl_graph_compact(*inputs, graph_sizes=num_nodes, return_mapping=False)
if isinstance(compacts, mx.nd.sparse.CSRNDArray):
compacts = [compacts]
if neighbor_type == 'in':
gis = [ImmutableGraphIndex(csr, None) for csr in compacts]
elif neighbor_type == 'out':
gis = [ImmutableGraphIndex(None, csr) for csr in compacts]
parent_nodes = [v[0:size] for v, size in zip(vertices, num_nodes)]
parent_edges = [lambda: e.data for e in subgraphs]
return gis, parent_nodes, parent_edges
def adjacency_matrix(self, transpose, ctx):
"""Return the adjacency matrix representation of this graph.
By default, a row of returned adjacency matrix represents the destination
of an edge and the column represents the source.
When transpose is True, a row represents the source and a column represents
a destination.
Parameters
----------
transpose : bool
A flag to transpose the returned adjacency matrix.
ctx : context
The device context of the returned matrix.
Returns
-------
NDArray
An object that returns tensor given context.
"""
if transpose:
mat = self._out_csr
else:
mat = self._in_csr
return mx.nd.contrib.dgl_adjacency(mat.as_in_context(ctx))
def from_coo_matrix(self, out_coo):
"""construct the graph index from a SciPy coo matrix.
Parameters
----------
out_coo : SciPy coo matrix
The non-zero entries indicate out-edges of the graph.
"""
edge_ids = mx.nd.arange(0, len(out_coo.data), step=1, repeat=1, dtype=np.int32)
src = mx.nd.array(out_coo.row, dtype=np.int64)
dst = mx.nd.array(out_coo.col, dtype=np.int64)
# TODO we can't generate a csr_matrix with np.int64 directly.
size = max(out_coo.shape)
self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=(size, size)).astype(np.int64),
mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=(size, size)).astype(np.int64))
def from_edge_list(self, elist):
"""Convert from an edge list.
Parameters
---------
elist : list
List of (u, v) edge tuple.
"""
src, dst = zip(*elist)
src = np.array(src)
dst = np.array(dst)
num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min())
if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.')
edge_ids = mx.nd.arange(0, len(src), step=1, repeat=1, dtype=np.int32)
src = mx.nd.array(src, dtype=np.int64)
dst = mx.nd.array(dst, dtype=np.int64)
# TODO we can't generate a csr_matrix with np.int64 directly.
in_csr = mx.nd.sparse.csr_matrix((edge_ids, (dst, src)),
shape=(num_nodes, num_nodes)).astype(np.int64)
out_csr = mx.nd.sparse.csr_matrix((edge_ids, (src, dst)),
shape=(num_nodes, num_nodes)).astype(np.int64)
self.__init__(in_csr, out_csr)
def create_immutable_graph_index(in_csr=None, out_csr=None):
""" Create an empty backend-specific immutable graph index.
Parameters
----------
in_csr : MXNet CSRNDArray
The in-edge CSR array.
out_csr : MXNet CSRNDArray
The out-edge CSR array.
Returns
-------
ImmutableGraphIndex
The backend-specific immutable graph index.
"""
if in_csr is not None and not isinstance(in_csr, mx.nd.sparse.CSRNDArray):
raise TypeError()
if out_csr is not None and not isinstance(out_csr, mx.nd.sparse.CSRNDArray):
raise TypeError()
return ImmutableGraphIndex(in_csr, out_csr)
...@@ -28,6 +28,14 @@ def tensor(data, dtype=None): ...@@ -28,6 +28,14 @@ def tensor(data, dtype=None):
dtype = np.float32 dtype = np.float32
return nd.array(data, dtype=dtype) return nd.array(data, dtype=dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "csr"
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
if fmt == 'coo': if fmt == 'coo':
......
...@@ -22,6 +22,14 @@ def cpu(): ...@@ -22,6 +22,14 @@ def cpu():
def tensor(data, dtype=None): def tensor(data, dtype=None):
return np.array(data, dtype) return np.array(data, dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "csr"
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
if fmt == 'coo': if fmt == 'coo':
...@@ -142,5 +150,3 @@ def zerocopy_to_numpy(input): ...@@ -142,5 +150,3 @@ def zerocopy_to_numpy(input):
def zerocopy_from_numpy(np_array): def zerocopy_from_numpy(np_array):
return np_array return np_array
# create_immutable_graph_index not enabled
...@@ -23,6 +23,14 @@ def cpu(): ...@@ -23,6 +23,14 @@ def cpu():
def tensor(data, dtype=None): def tensor(data, dtype=None):
return th.tensor(data, dtype=dtype) return th.tensor(data, dtype=dtype)
def get_preferred_sparse_format():
"""Get the preferred sparse matrix format supported by the backend.
Different backends have their preferred backend. This info is useful when
constructing a sparse matrix.
"""
return "coo"
if TH_VERSION.version[0] == 0: if TH_VERSION.version[0] == 0:
def sparse_matrix(data, index, shape, force_format=False): def sparse_matrix(data, index, shape, force_format=False):
fmt = index[0] fmt = index[0]
...@@ -64,7 +72,10 @@ def astype(input, ty): ...@@ -64,7 +72,10 @@ def astype(input, ty):
return input.type(ty) return input.type(ty)
def asnumpy(input): def asnumpy(input):
return input.cpu().numpy() if isinstance(input, th.sparse.FloatTensor):
return input.to_dense().cpu().numpy()
else:
return input.cpu().numpy()
def copy_to(input, ctx): def copy_to(input, ctx):
if ctx.type == 'cpu': if ctx.type == 'cpu':
...@@ -188,5 +199,3 @@ def zerocopy_to_numpy(input): ...@@ -188,5 +199,3 @@ def zerocopy_to_numpy(input):
def zerocopy_from_numpy(np_array): def zerocopy_from_numpy(np_array):
return th.from_numpy(np_array) return th.from_numpy(np_array)
# create_immutable_graph_index not enabled
# This file contains subgraph samplers. # This file contains subgraph samplers.
import sys
import numpy as np import numpy as np
import threading import threading
import random import random
...@@ -18,8 +19,7 @@ __all__ = ['NeighborSampler'] ...@@ -18,8 +19,7 @@ __all__ = ['NeighborSampler']
class NSSubgraphLoader(object): class NSSubgraphLoader(object):
def __init__(self, g, batch_size, expand_factor, num_hops=1, def __init__(self, g, batch_size, expand_factor, num_hops=1,
neighbor_type='in', node_prob=None, seed_nodes=None, neighbor_type='in', node_prob=None, seed_nodes=None,
shuffle=False, num_workers=1, max_subgraph_size=None, shuffle=False, num_workers=1, return_seed_id=False):
return_seed_id=False):
self._g = g self._g = g
if not g._graph.is_readonly(): if not g._graph.is_readonly():
raise NotImplementedError("subgraph loader only support read-only graphs.") raise NotImplementedError("subgraph loader only support read-only graphs.")
...@@ -38,11 +38,6 @@ class NSSubgraphLoader(object): ...@@ -38,11 +38,6 @@ class NSSubgraphLoader(object):
if shuffle: if shuffle:
self._seed_nodes = F.rand_shuffle(self._seed_nodes) self._seed_nodes = F.rand_shuffle(self._seed_nodes)
self._num_workers = num_workers self._num_workers = num_workers
if max_subgraph_size is None:
# This size is set temporarily.
self._max_subgraph_size = 1000000
else:
self._max_subgraph_size = max_subgraph_size
self._neighbor_type = neighbor_type self._neighbor_type = neighbor_type
self._subgraphs = [] self._subgraphs = []
self._seed_ids = [] self._seed_ids = []
...@@ -61,7 +56,7 @@ class NSSubgraphLoader(object): ...@@ -61,7 +56,7 @@ class NSSubgraphLoader(object):
self._subgraph_idx += 1 self._subgraph_idx += 1
sgi = self._g._graph.neighbor_sampling(seed_ids, self._expand_factor, sgi = self._g._graph.neighbor_sampling(seed_ids, self._expand_factor,
self._num_hops, self._neighbor_type, self._num_hops, self._neighbor_type,
self._node_prob, self._max_subgraph_size) self._node_prob)
subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \ subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \
i) for i in sgi] i) for i in sgi]
self._subgraphs.extend(subgraphs) self._subgraphs.extend(subgraphs)
...@@ -200,13 +195,10 @@ class _PrefetchingLoader(object): ...@@ -200,13 +195,10 @@ class _PrefetchingLoader(object):
def NeighborSampler(g, batch_size, expand_factor, num_hops=1, def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
neighbor_type='in', node_prob=None, seed_nodes=None, neighbor_type='in', node_prob=None, seed_nodes=None,
shuffle=False, num_workers=1, max_subgraph_size=None, shuffle=False, num_workers=1,
return_seed_id=False, prefetch=False): return_seed_id=False, prefetch=False):
'''Create a sampler that samples neighborhood. '''Create a sampler that samples neighborhood.
.. note:: This method currently only supports MXNet backend. Set
"DGLBACKEND" environment variable to "mxnet".
This creates a subgraph data loader that samples subgraphs from the input graph This creates a subgraph data loader that samples subgraphs from the input graph
with neighbor sampling. This sampling method is implemented in C and can perform with neighbor sampling. This sampling method is implemented in C and can perform
sampling very efficiently. sampling very efficiently.
...@@ -246,8 +238,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, ...@@ -246,8 +238,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
If it's None, the seed vertices are all vertices in the graph. If it's None, the seed vertices are all vertices in the graph.
shuffle: indicates the sampled subgraphs are shuffled. shuffle: indicates the sampled subgraphs are shuffled.
num_workers: the number of worker threads that sample subgraphs in parallel. num_workers: the number of worker threads that sample subgraphs in parallel.
max_subgraph_size: the maximal subgraph size in terms of the number of nodes.
GPU doesn't support very large subgraphs.
return_seed_id: indicates whether to return seed ids along with the subgraphs. return_seed_id: indicates whether to return seed ids along with the subgraphs.
The seed Ids are in the parent graph. The seed Ids are in the parent graph.
prefetch : bool, default False prefetch : bool, default False
...@@ -260,7 +250,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, ...@@ -260,7 +250,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
information about the subgraphs. information about the subgraphs.
''' '''
loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob, loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob,
seed_nodes, shuffle, num_workers, max_subgraph_size, return_seed_id) seed_nodes, shuffle, num_workers, return_seed_id)
if not prefetch: if not prefetch:
return loader return loader
else: else:
......
...@@ -915,7 +915,7 @@ class DGLGraph(object): ...@@ -915,7 +915,7 @@ class DGLGraph(object):
else: else:
raise DGLError('Invalid form:', form) raise DGLError('Invalid form:', form)
def all_edges(self, form='uv', return_sorted=False): def all_edges(self, form='uv', order=None):
"""Return all the edges. """Return all the edges.
Parameters Parameters
...@@ -926,8 +926,12 @@ class DGLGraph(object): ...@@ -926,8 +926,12 @@ class DGLGraph(object):
- 'all' : a tuple (u, v, eid) - 'all' : a tuple (u, v, eid)
- 'uv' : a pair (u, v), default - 'uv' : a pair (u, v), default
- 'eid' : one eid tensor - 'eid' : one eid tensor
return_sorted : bool order : string
True if the returned edges are sorted by their src and dst ids. The order of the returned edges. Currently support:
- 'srcdst' : sorted by their src and dst ids.
- 'eid' : sorted by edge Ids.
- None : the arbitrary order.
Returns Returns
------- -------
...@@ -953,7 +957,7 @@ class DGLGraph(object): ...@@ -953,7 +957,7 @@ class DGLGraph(object):
>>> G.all_edges('all') >>> G.all_edges('all')
(tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2])) (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
""" """
src, dst, eid = self._graph.edges(return_sorted) src, dst, eid = self._graph.edges(order)
if form == 'all': if form == 'all':
return (src.tousertensor(), dst.tousertensor(), eid.tousertensor()) return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
elif form == 'uv': elif form == 'uv':
......
...@@ -11,7 +11,6 @@ from ._ffi.function import _init_api ...@@ -11,7 +11,6 @@ from ._ffi.function import _init_api
from .base import DGLError from .base import DGLError
from . import backend as F from . import backend as F
from . import utils from . import utils
from .immutable_graph_index import create_immutable_graph_index
GraphIndexHandle = ctypes.c_void_p GraphIndexHandle = ctypes.c_void_p
...@@ -23,8 +22,10 @@ class GraphIndex(object): ...@@ -23,8 +22,10 @@ class GraphIndex(object):
handle : GraphIndexHandle handle : GraphIndexHandle
Handler Handler
""" """
def __init__(self, handle): def __init__(self, handle=None, multigraph=None, readonly=None):
self._handle = handle self._handle = handle
self._multigraph = multigraph
self._readonly = readonly
self._cache = {} self._cache = {}
def __del__(self): def __del__(self):
...@@ -35,21 +36,35 @@ class GraphIndex(object): ...@@ -35,21 +36,35 @@ class GraphIndex(object):
src, dst, _ = self.edges() src, dst, _ = self.edges()
n_nodes = self.number_of_nodes() n_nodes = self.number_of_nodes()
multigraph = self.is_multigraph() multigraph = self.is_multigraph()
readonly = self.is_readonly()
return n_nodes, multigraph, src, dst return n_nodes, multigraph, readonly, src, dst
def __setstate__(self, state): def __setstate__(self, state):
"""The pickle state of GraphIndex is defined as a triplet """The pickle state of GraphIndex is defined as a triplet
(number_of_nodes, multigraph, src_nodes, dst_nodes) (number_of_nodes, multigraph, readonly, src_nodes, dst_nodes)
""" """
n_nodes, multigraph, src, dst = state n_nodes, multigraph, readonly, src, dst = state
self._handle = _CAPI_DGLGraphCreate(multigraph) if readonly:
self._cache = {} self._readonly = readonly
self._multigraph = multigraph
self.init(src, dst, F.arange(0, len(src)), n_nodes)
else:
self._handle = _CAPI_DGLGraphCreateMutable(multigraph)
self._cache = {}
self.clear()
self.add_nodes(n_nodes)
self.add_edges(src, dst)
self.clear() def init(self, src_ids, dst_ids, edge_ids, num_nodes):
self.add_nodes(n_nodes) """The actual init function"""
self.add_edges(src, dst) assert len(src_ids) == len(dst_ids)
assert len(src_ids) == len(edge_ids)
self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(),
edge_ids.todgltensor(), self._multigraph, num_nodes,
self._readonly)
def add_nodes(self, num): def add_nodes(self, num):
"""Add nodes. """Add nodes.
...@@ -107,7 +122,9 @@ class GraphIndex(object): ...@@ -107,7 +122,9 @@ class GraphIndex(object):
bool bool
True if it is a multigraph, False otherwise. True if it is a multigraph, False otherwise.
""" """
return bool(_CAPI_DGLGraphIsMultigraph(self._handle)) if self._multigraph is None:
self._multigraph = bool(_CAPI_DGLGraphIsMultigraph(self._handle))
return self._multigraph
def is_readonly(self): def is_readonly(self):
"""Indicate whether the graph index is read-only. """Indicate whether the graph index is read-only.
...@@ -117,7 +134,9 @@ class GraphIndex(object): ...@@ -117,7 +134,9 @@ class GraphIndex(object):
bool bool
True if it is a read-only graph, False otherwise. True if it is a read-only graph, False otherwise.
""" """
return False if self._readonly is None:
self._readonly = bool(_CAPI_DGLGraphIsReadonly(self._handle))
return self._readonly
def number_of_nodes(self): def number_of_nodes(self):
"""Return the number of nodes. """Return the number of nodes.
...@@ -367,13 +386,17 @@ class GraphIndex(object): ...@@ -367,13 +386,17 @@ class GraphIndex(object):
return src, dst, eid return src, dst, eid
@utils.cached_member(cache='_cache', prefix='edges') @utils.cached_member(cache='_cache', prefix='edges')
def edges(self, return_sorted=False): def edges(self, order=None):
"""Return all the edges """Return all the edges
Parameters Parameters
---------- ----------
return_sorted : bool order : string
True if the returned edges are sorted by their src and dst ids. The order of the returned edges. Currently support:
- 'srcdst' : sorted by their src and dst ids.
- 'eid' : sorted by edge Ids.
- None : the arbitrary order.
Returns Returns
------- -------
...@@ -384,9 +407,11 @@ class GraphIndex(object): ...@@ -384,9 +407,11 @@ class GraphIndex(object):
utils.Index utils.Index
The edge ids. The edge ids.
""" """
key = 'edges_s%d' % return_sorted key = 'edges_s%s' % order
if key not in self._cache: if key not in self._cache:
edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted) if order is None:
order = ""
edge_array = _CAPI_DGLGraphEdges(self._handle, order)
src = utils.toindex(edge_array(0)) src = utils.toindex(edge_array(0))
dst = utils.toindex(edge_array(1)) dst = utils.toindex(edge_array(1))
eid = utils.toindex(edge_array(2)) eid = utils.toindex(edge_array(2))
...@@ -537,22 +562,27 @@ class GraphIndex(object): ...@@ -537,22 +562,27 @@ class GraphIndex(object):
if not isinstance(transpose, bool): if not isinstance(transpose, bool):
raise DGLError('Expect bool value for "transpose" arg,' raise DGLError('Expect bool value for "transpose" arg,'
' but got %s.' % (type(transpose))) ' but got %s.' % (type(transpose)))
src, dst, _ = self.edges(False) fmt = F.get_preferred_sparse_format()
src = src.tousertensor(ctx) # the index of the ctx will be cached rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt)
dst = dst.tousertensor(ctx) # the index of the ctx will be cached if fmt == "csr":
src = F.unsqueeze(src, dim=0) indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
dst = F.unsqueeze(dst, dim=0) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx)
if transpose: shuffle = utils.toindex(rst(2))
idx = F.cat([src, dst], dim=0) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx)
return F.sparse_matrix(dat, ('csr', indices, indptr),
(self.number_of_nodes(), self.number_of_nodes()))[0], shuffle
elif fmt == "coo":
## FIXME(minjie): data type
idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx)
m = self.number_of_edges()
idx = F.reshape(idx, (2, m))
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
n = self.number_of_nodes()
adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n))
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return adj, shuffle_idx
else: else:
idx = F.cat([dst, src], dim=0) raise Exception("unknown format")
n = self.number_of_nodes()
m = self.number_of_edges()
# FIXME(minjie): data type
dat = F.ones((m,), dtype=F.float32, ctx=ctx)
adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n))
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return adj, shuffle_idx
@utils.cached_member(cache='_cache', prefix='inc') @utils.cached_member(cache='_cache', prefix='inc')
def incidence_matrix(self, typestr, ctx): def incidence_matrix(self, typestr, ctx):
...@@ -590,7 +620,7 @@ class GraphIndex(object): ...@@ -590,7 +620,7 @@ class GraphIndex(object):
A index for data shuffling due to sparse format change. Return None A index for data shuffling due to sparse format change. Return None
if shuffle is not required. if shuffle is not required.
""" """
src, dst, eid = self.edges(False) src, dst, eid = self.edges()
src = src.tousertensor(ctx) # the index of the ctx will be cached src = src.tousertensor(ctx) # the index of the ctx will be cached
dst = dst.tousertensor(ctx) # the index of the ctx will be cached dst = dst.tousertensor(ctx) # the index of the ctx will be cached
eid = eid.tousertensor(ctx) # the index of the ctx will be cached eid = eid.tousertensor(ctx) # the index of the ctx will be cached
...@@ -631,6 +661,22 @@ class GraphIndex(object): ...@@ -631,6 +661,22 @@ class GraphIndex(object):
shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
return inc, shuffle_idx return inc, shuffle_idx
def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob):
"""Neighborhood sampling"""
if len(seed_ids) == 0:
return []
seed_ids = [v.todgltensor() for v in seed_ids]
num_subgs = len(seed_ids)
if node_prob is None:
rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor)
else:
rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops,
expand_factor)
return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
def to_networkx(self): def to_networkx(self):
"""Convert to networkx graph. """Convert to networkx graph.
...@@ -659,8 +705,6 @@ class GraphIndex(object): ...@@ -659,8 +705,6 @@ class GraphIndex(object):
nx_graph : networkx.DiGraph nx_graph : networkx.DiGraph
The nx graph The nx graph
""" """
self.clear()
if not isinstance(nx_graph, nx.Graph): if not isinstance(nx_graph, nx.Graph):
nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph() nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
else nx.DiGraph(nx_graph)) else nx.DiGraph(nx_graph))
...@@ -671,9 +715,13 @@ class GraphIndex(object): ...@@ -671,9 +715,13 @@ class GraphIndex(object):
nx_graph = nx_graph.to_directed() nx_graph = nx_graph.to_directed()
num_nodes = nx_graph.number_of_nodes() num_nodes = nx_graph.number_of_nodes()
self.add_nodes(num_nodes) if not self.is_readonly():
self.clear()
self.add_nodes(num_nodes)
if nx_graph.number_of_edges() == 0: if nx_graph.number_of_edges() == 0:
if self.is_readonly():
raise Exception("can't create an empty immutable graph")
return return
# nx_graph.edges(data=True) returns src, dst, attr_dict # nx_graph.edges(data=True) returns src, dst, attr_dict
...@@ -692,9 +740,14 @@ class GraphIndex(object): ...@@ -692,9 +740,14 @@ class GraphIndex(object):
for e in nx_graph.edges: for e in nx_graph.edges:
src.append(e[0]) src.append(e[0])
dst.append(e[1]) dst.append(e[1])
eid = np.arange(0, len(src), dtype=np.int64)
num_nodes = nx_graph.number_of_nodes()
# We store edge Ids as an edge attribute.
eid = utils.toindex(eid)
src = utils.toindex(src) src = utils.toindex(src)
dst = utils.toindex(dst) dst = utils.toindex(dst)
self.add_edges(src, dst) self.init(src, dst, eid, num_nodes)
def from_scipy_sparse_matrix(self, adj): def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix. """Convert from scipy sparse matrix.
...@@ -703,12 +756,17 @@ class GraphIndex(object): ...@@ -703,12 +756,17 @@ class GraphIndex(object):
---------- ----------
adj : scipy sparse matrix adj : scipy sparse matrix
""" """
self.clear() assert isinstance(adj, (scipy.sparse.csr_matrix, scipy.sparse.coo_matrix)), \
self.add_nodes(adj.shape[0]) "The input matrix has to be a SciPy sparse matrix."
if not self.is_readonly():
self.clear()
num_nodes = max(adj.shape[0], adj.shape[1])
adj_coo = adj.tocoo() adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row) src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col) dst = utils.toindex(adj_coo.col)
self.add_edges(src, dst) edge_ids = utils.toindex(F.arange(0, len(adj_coo.row)))
self.init(src, dst, edge_ids, num_nodes)
def from_edge_list(self, elist): def from_edge_list(self, elist):
"""Convert from an edge list. """Convert from an edge list.
...@@ -718,16 +776,19 @@ class GraphIndex(object): ...@@ -718,16 +776,19 @@ class GraphIndex(object):
elist : list elist : list
List of (u, v) edge tuple. List of (u, v) edge tuple.
""" """
self.clear() if not self.is_readonly():
self.clear()
src, dst = zip(*elist) src, dst = zip(*elist)
src = np.array(src) src = np.array(src)
dst = np.array(dst) dst = np.array(dst)
src_ids = utils.toindex(src)
dst_ids = utils.toindex(dst)
num_nodes = max(src.max(), dst.max()) + 1 num_nodes = max(src.max(), dst.max()) + 1
min_nodes = min(src.min(), dst.min()) min_nodes = min(src.min(), dst.min())
if min_nodes != 0: if min_nodes != 0:
raise DGLError('Invalid edge list. Nodes must start from 0.') raise DGLError('Invalid edge list. Nodes must start from 0.')
self.add_nodes(num_nodes) edge_ids = utils.toindex(F.arange(0, len(src)))
self.add_edges(utils.toindex(src), utils.toindex(dst)) self.init(src_ids, dst_ids, edge_ids, num_nodes)
def line_graph(self, backtracking=True): def line_graph(self, backtracking=True):
"""Return the line graph of this graph. """Return the line graph of this graph.
...@@ -761,7 +822,8 @@ class SubgraphIndex(GraphIndex): ...@@ -761,7 +822,8 @@ class SubgraphIndex(GraphIndex):
The parent edge ids in this subgraph. The parent edge ids in this subgraph.
""" """
def __init__(self, handle, parent, induced_nodes, induced_edges): def __init__(self, handle, parent, induced_nodes, induced_edges):
super(SubgraphIndex, self).__init__(handle) super(SubgraphIndex, self).__init__(parent.is_multigraph(), parent.is_readonly())
self._handle = handle
self._parent = parent self._parent = parent
self._induced_nodes = induced_nodes self._induced_nodes = induced_nodes
self._induced_edges = induced_edges self._induced_edges = induced_edges
...@@ -813,7 +875,7 @@ def map_to_subgraph_nid(subgraph, parent_nids): ...@@ -813,7 +875,7 @@ def map_to_subgraph_nid(subgraph, parent_nids):
Parameters Parameters
---------- ----------
subgraph: SubgraphIndex or ImmutableSubgraphIndex subgraph: SubgraphIndex
the graph index of a subgraph the graph index of a subgraph
parent_nids: utils.Index parent_nids: utils.Index
...@@ -900,12 +962,15 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): ...@@ -900,12 +962,15 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
return graph_data return graph_data
if readonly: if readonly:
return create_immutable_graph_index(graph_data) # FIXME(zhengda): we should construct a C graph index before constructing GraphIndex.
gidx = GraphIndex(None, multigraph, readonly)
handle = _CAPI_DGLGraphCreate(multigraph) else:
gidx = GraphIndex(handle) handle = _CAPI_DGLGraphCreateMutable(multigraph)
gidx = GraphIndex(handle, multigraph, readonly)
if graph_data is None: if graph_data is None and readonly:
raise Exception("can't create an empty immutable graph")
elif graph_data is None:
return gidx return gidx
# edge list # edge list
...@@ -933,4 +998,30 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): ...@@ -933,4 +998,30 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
return gidx return gidx
_init_api("dgl.graph_index") _init_api("dgl.graph_index")
# TODO(zhengda): we'll support variable-length inputs.
_NEIGHBOR_SAMPLING_APIS = {
1: _CAPI_DGLGraphUniformSampling,
2: _CAPI_DGLGraphUniformSampling2,
4: _CAPI_DGLGraphUniformSampling4,
8: _CAPI_DGLGraphUniformSampling8,
16: _CAPI_DGLGraphUniformSampling16,
32: _CAPI_DGLGraphUniformSampling32,
64: _CAPI_DGLGraphUniformSampling64,
128: _CAPI_DGLGraphUniformSampling128,
}
_EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))]
def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor):
num_seeds = len(seed_ids)
empty_ids = []
if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys():
remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids)
empty_ids = _EMPTY_ARRAYS[0:remain]
seed_ids.extend([empty.todgltensor() for empty in empty_ids])
assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys()
return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type,
num_hops, expand_factor, num_seeds)
This diff is collapsed.
...@@ -12,6 +12,38 @@ ...@@ -12,6 +12,38 @@
#include "../c_api_common.h" #include "../c_api_common.h"
namespace dgl { namespace dgl {
Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes,
bool multigraph): is_multigraph_(multigraph) {
CHECK(IsValidIdArray(src_ids));
CHECK(IsValidIdArray(dst_ids));
CHECK(IsValidIdArray(edge_ids));
this->AddVertices(num_nodes);
num_edges_ = src_ids->shape[0];
CHECK(num_edges_ == dst_ids->shape[0]) << "vectors in COO must have the same length";
CHECK(num_edges_ == edge_ids->shape[0]) << "vectors in COO must have the same length";
const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
all_edges_src_.reserve(num_edges_);
all_edges_dst_.reserve(num_edges_);
for (int64_t i = 0; i < num_edges_; i++) {
auto src = src_data[i];
auto dst = dst_data[i];
auto eid = edge_data[i];
CHECK(HasVertex(src) && HasVertex(dst))
<< "Invalid vertices: src=" << src << " dst=" << dst;
adjlist_[src].succ.push_back(dst);
adjlist_[src].edge_id.push_back(eid);
reverse_adjlist_[dst].succ.push_back(src);
reverse_adjlist_[dst].edge_id.push_back(eid);
all_edges_src_.push_back(src);
all_edges_dst_.push_back(dst);
}
}
void Graph::AddVertices(uint64_t num_vertices) { void Graph::AddVertices(uint64_t num_vertices) {
CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed.";
adjlist_.resize(adjlist_.size() + num_vertices); adjlist_.resize(adjlist_.size() + num_vertices);
...@@ -333,13 +365,13 @@ Graph::EdgeArray Graph::OutEdges(IdArray vids) const { ...@@ -333,13 +365,13 @@ Graph::EdgeArray Graph::OutEdges(IdArray vids) const {
} }
// O(E*log(E)) if sort is required; otherwise, O(E) // O(E*log(E)) if sort is required; otherwise, O(E)
Graph::EdgeArray Graph::Edges(bool sorted) const { Graph::EdgeArray Graph::Edges(const std::string &order) const {
const int64_t len = num_edges_; const int64_t len = num_edges_;
IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
if (sorted) { if (order == "srcdst") {
typedef std::tuple<int64_t, int64_t, int64_t> Tuple; typedef std::tuple<int64_t, int64_t, int64_t> Tuple;
std::vector<Tuple> tuples; std::vector<Tuple> tuples;
tuples.reserve(len); tuples.reserve(len);
...@@ -416,8 +448,9 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { ...@@ -416,8 +448,9 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const {
oldv2newv[vid_data[i]] = i; oldv2newv[vid_data[i]] = i;
} }
Subgraph rst; Subgraph rst;
rst.graph = std::make_shared<Graph>(IsMultigraph());
rst.induced_vertices = vids; rst.induced_vertices = vids;
rst.graph.AddVertices(len); rst.graph->AddVertices(len);
for (int64_t i = 0; i < len; ++i) { for (int64_t i = 0; i < len; ++i) {
const dgl_id_t oldvid = vid_data[i]; const dgl_id_t oldvid = vid_data[i];
const dgl_id_t newvid = i; const dgl_id_t newvid = i;
...@@ -426,7 +459,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { ...@@ -426,7 +459,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const {
if (oldv2newv.count(oldsucc)) { if (oldv2newv.count(oldsucc)) {
const dgl_id_t newsucc = oldv2newv[oldsucc]; const dgl_id_t newsucc = oldv2newv[oldsucc];
edges.push_back(adjlist_[oldvid].edge_id[j]); edges.push_back(adjlist_[oldvid].edge_id[j]);
rst.graph.AddEdge(newvid, newsucc); rst.graph->AddEdge(newvid, newsucc);
} }
} }
} }
...@@ -453,13 +486,14 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { ...@@ -453,13 +486,14 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const {
} }
Subgraph rst; Subgraph rst;
rst.graph = std::make_shared<Graph>(IsMultigraph());
rst.induced_edges = eids; rst.induced_edges = eids;
rst.graph.AddVertices(nodes.size()); rst.graph->AddVertices(nodes.size());
for (int64_t i = 0; i < len; ++i) { for (int64_t i = 0; i < len; ++i) {
dgl_id_t src_id = all_edges_src_[eid_data[i]]; dgl_id_t src_id = all_edges_src_[eid_data[i]];
dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; dgl_id_t dst_id = all_edges_dst_[eid_data[i]];
rst.graph.AddEdge(oldv2newv[src_id], oldv2newv[dst_id]); rst.graph->AddEdge(oldv2newv[src_id], oldv2newv[dst_id]);
} }
rst.induced_vertices = IdArray::Empty( rst.induced_vertices = IdArray::Empty(
...@@ -469,9 +503,59 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { ...@@ -469,9 +503,59 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const {
return rst; return rst;
} }
Graph Graph::Reverse() const { std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const {
int64_t num_edges = NumEdges();
int64_t num_nodes = NumVertices();
if (fmt == "coo") {
IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *idx_data = static_cast<int64_t*>(idx->data);
if (transpose) {
std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges);
} else {
std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
}
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *eid_data = static_cast<int64_t*>(eid->data);
for (uint64_t eid = 0; eid < num_edges; ++eid) {
eid_data[eid] = eid;
}
return std::vector<IdArray>{idx, eid};
} else if (fmt == "csr") {
IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1},
DLContext{kDLCPU, 0});
IdArray indices = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
int64_t *indices_data = static_cast<int64_t*>(indices->data);
int64_t *eid_data = static_cast<int64_t*>(eid->data);
const AdjacencyList *adjlist;
if (transpose) {
// Out-edges.
adjlist = &adjlist_;
} else {
// In-edges.
adjlist = &reverse_adjlist_;
}
indptr_data[0] = 0;
for (size_t i = 0; i < adjlist->size(); i++) {
indptr_data[i + 1] = indptr_data[i] + adjlist->at(i).succ.size();
std::copy(adjlist->at(i).succ.begin(), adjlist->at(i).succ.end(),
indices_data + indptr_data[i]);
std::copy(adjlist->at(i).edge_id.begin(), adjlist->at(i).edge_id.end(),
eid_data + indptr_data[i]);
}
return std::vector<IdArray>{indptr, indices, eid};
} else {
LOG(FATAL) << "unsupported format";
return std::vector<IdArray>();
}
}
GraphPtr Graph::Reverse() const {
LOG(FATAL) << "not implemented"; LOG(FATAL) << "not implemented";
return *this; return nullptr;
} }
} // namespace dgl } // namespace dgl
This diff is collapsed.
This diff is collapsed.
...@@ -48,7 +48,7 @@ DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketingForFullGra ...@@ -48,7 +48,7 @@ DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketingForFullGra
.set_body([] (DGLArgs args, DGLRetValue* rv) { .set_body([] (DGLArgs args, DGLRetValue* rv) {
GraphHandle ghandle = args[0]; GraphHandle ghandle = args[0];
const Graph* gptr = static_cast<Graph*>(ghandle); const Graph* gptr = static_cast<Graph*>(ghandle);
const auto& edges = gptr->Edges(false); const auto& edges = gptr->Edges("");
int64_t n_vertices = gptr->NumVertices(); int64_t n_vertices = gptr->NumVertices();
IdArray nids = IdArray::Empty({n_vertices}, edges.dst->dtype, edges.dst->ctx); IdArray nids = IdArray::Empty({n_vertices}, edges.dst->dtype, edges.dst->ctx);
int64_t* nid_data = static_cast<int64_t*>(nids->data); int64_t* nid_data = static_cast<int64_t*>(nids->data);
......
import os import backend as F
os.environ['DGLBACKEND'] = 'mxnet' import networkx as nx
import mxnet as mx
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import dgl import dgl
from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index
from dgl import utils from dgl import utils
def generate_from_networkx():
edges = [[2, 3], [2, 5], [3, 0], [1, 0], [4, 3], [4, 5]]
nx_graph = nx.DiGraph()
nx_graph.add_edges_from(edges)
g = create_graph_index(nx_graph)
ig = create_graph_index(nx_graph, readonly=True)
return g, ig
def generate_from_edgelist():
edges = [[2, 3], [2, 5], [3, 0], [6, 10], [10, 3], [10, 15]]
g = create_graph_index(edges)
ig = create_graph_index(edges, readonly=True)
return g, ig
def generate_rand_graph(n): def generate_rand_graph(n):
arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64) arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
g = create_graph_index(arr) g = create_graph_index(arr)
...@@ -14,64 +27,87 @@ def generate_rand_graph(n): ...@@ -14,64 +27,87 @@ def generate_rand_graph(n):
return g, ig return g, ig
def check_graph_equal(g1, g2): def check_graph_equal(g1, g2):
adj1 = g1.adjacency_matrix(False, mx.cpu())[0] != 0 adj1 = g1.adjacency_matrix(False, F.cpu())[0]
adj2 = g2.adjacency_matrix(False, mx.cpu())[0] != 0 adj2 = g2.adjacency_matrix(False, F.cpu())[0]
assert mx.nd.sum(adj1 - adj2).asnumpy() == 0 assert np.all(F.asnumpy(adj1) == F.asnumpy(adj2))
def test_graph_gen(): def test_graph_gen():
g, ig = generate_from_edgelist()
check_graph_equal(g, ig)
g, ig = generate_rand_graph(10) g, ig = generate_rand_graph(10)
check_graph_equal(g, ig) check_graph_equal(g, ig)
def sort_edges(edges):
edges = [e.tousertensor() for e in edges]
if np.prod(edges[2].shape) > 0:
val, idx = F.sort_1d(edges[2])
return (edges[0][idx], edges[1][idx], edges[2][idx])
else:
return (edges[0], edges[1], edges[2])
def check_basics(g, ig): def check_basics(g, ig):
assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_nodes() == ig.number_of_nodes()
assert g.number_of_edges() == ig.number_of_edges() assert g.number_of_edges() == ig.number_of_edges()
edges = g.edges() edges = g.edges("srcdst")
iedges = ig.edges() iedges = ig.edges("srcdst")
assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())
edges = g.edges("eid")
iedges = ig.edges("eid")
assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor())
assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor())
assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor())
for i in range(g.number_of_nodes()): for i in range(g.number_of_nodes()):
assert g.has_node(i) == ig.has_node(i) assert g.has_node(i) == ig.has_node(i)
for i in range(g.number_of_nodes()): for i in range(g.number_of_nodes()):
assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy() assert F.array_equal(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor())
assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy() assert F.array_equal(g.successors(i).tousertensor(), ig.successors(i).tousertensor())
randv = np.random.randint(0, g.number_of_nodes(), 10) randv = np.random.randint(0, g.number_of_nodes(), 10)
randv = utils.toindex(randv) randv = utils.toindex(randv)
in_src1, in_dst1, in_eids1 = g.in_edges(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv))
in_src2, in_dst2, in_eids2 = ig.in_edges(randv) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv))
nnz = in_src2.tousertensor().shape[0] nnz = in_src2.shape[0]
assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_src1, in_src2)
assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_dst1, in_dst2)
assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz assert F.array_equal(in_eids1, in_eids2)
out_src1, out_dst1, out_eids1 = g.out_edges(randv) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv))
out_src2, out_dst2, out_eids2 = ig.out_edges(randv) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv))
nnz = out_dst2.tousertensor().shape[0] nnz = out_dst2.shape[0]
assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_dst1, out_dst2)
assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_src1, out_src2)
assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz assert F.array_equal(out_eids1, out_eids2)
num_v = len(randv) num_v = len(randv)
assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v assert F.array_equal(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor())
assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v assert F.array_equal(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor())
randv = randv.tousertensor() randv = randv.tousertensor()
for v in randv.asnumpy(): for v in F.asnumpy(randv):
assert g.in_degree(v) == ig.in_degree(v) assert g.in_degree(v) == ig.in_degree(v)
assert g.out_degree(v) == ig.out_degree(v) assert g.out_degree(v) == ig.out_degree(v)
for u in randv.asnumpy(): for u in F.asnumpy(randv):
for v in randv.asnumpy(): for v in F.asnumpy(randv):
if len(g.edge_id(u, v)) == 1: if len(g.edge_id(u, v)) == 1:
assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy()
assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) assert g.has_edge_between(u, v) == ig.has_edge_between(u, v)
randv = utils.toindex(randv) randv = utils.toindex(randv)
ids = g.edge_ids(randv, randv)[2].tonumpy() ids = g.edge_ids(randv, randv)[2].tonumpy()
assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids) == len(ids) assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids)
assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy()) == len(randv) assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy(), 0) == len(randv)
def test_basics(): def test_basics():
g, ig = generate_from_edgelist()
check_basics(g, ig)
g, ig = generate_from_networkx()
check_basics(g, ig)
g, ig = generate_rand_graph(100) g, ig = generate_rand_graph(100)
check_basics(g, ig) check_basics(g, ig)
...@@ -84,9 +120,10 @@ def test_node_subgraph(): ...@@ -84,9 +120,10 @@ def test_node_subgraph():
randv = np.unique(randv1) randv = np.unique(randv1)
subg = g.node_subgraph(utils.toindex(randv)) subg = g.node_subgraph(utils.toindex(randv))
subig = ig.node_subgraph(utils.toindex(randv)) subig = ig.node_subgraph(utils.toindex(randv))
check_basics(subg, subig)
check_graph_equal(subg, subig) check_graph_equal(subg, subig)
assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor() assert F.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor()
== map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10 == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor(), 0) == 10
# node_subgraphs # node_subgraphs
randvs = [] randvs = []
...@@ -97,6 +134,7 @@ def test_node_subgraph(): ...@@ -97,6 +134,7 @@ def test_node_subgraph():
subgs.append(g.node_subgraph(utils.toindex(randv))) subgs.append(g.node_subgraph(utils.toindex(randv)))
subigs= ig.node_subgraphs(randvs) subigs= ig.node_subgraphs(randvs)
for i in range(4): for i in range(4):
check_basics(subg, subig)
check_graph_equal(subgs[i], subigs[i]) check_graph_equal(subgs[i], subigs[i])
def test_create_graph(): def test_create_graph():
...@@ -110,6 +148,7 @@ def test_create_graph(): ...@@ -110,6 +148,7 @@ def test_create_graph():
rows = [1, 0, 0] rows = [1, 0, 0]
cols = [2, 1, 2] cols = [2, 1, 2]
mat = sp.sparse.coo_matrix((data, (rows, cols))) mat = sp.sparse.coo_matrix((data, (rows, cols)))
g = dgl.DGLGraph(mat, readonly=False)
ig = dgl.DGLGraph(mat, readonly=True) ig = dgl.DGLGraph(mat, readonly=True)
for edge in elist: for edge in elist:
assert g.edge_id(edge[0], edge[1]) == ig.edge_id(edge[0], edge[1]) assert g.edge_id(edge[0], edge[1]) == ig.edge_id(edge[0], edge[1])
......
import os import backend as F
os.environ['DGLBACKEND'] = 'mxnet'
import mxnet as mx
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import dgl import dgl
...@@ -19,7 +17,7 @@ def test_1neighbor_sampler_all(): ...@@ -19,7 +17,7 @@ def test_1neighbor_sampler_all():
assert len(seed_ids) == 1 assert len(seed_ids) == 1
src, dst, eid = g.in_edges(seed_ids, form='all') src, dst, eid = g.in_edges(seed_ids, form='all')
# Test if there is a self loop # Test if there is a self loop
self_loop = mx.nd.sum(src == dst).asnumpy() == 1 self_loop = F.asnumpy(F.sum(src == dst, 0)) == 1
if self_loop: if self_loop:
assert subg.number_of_nodes() == len(src) assert subg.number_of_nodes() == len(src)
else: else:
...@@ -30,22 +28,22 @@ def test_1neighbor_sampler_all(): ...@@ -30,22 +28,22 @@ def test_1neighbor_sampler_all():
child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all')
child_src1 = subg.map_to_subgraph_nid(src) child_src1 = subg.map_to_subgraph_nid(src)
assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src)
def is_sorted(arr): def is_sorted(arr):
return np.sum(np.sort(arr) == arr) == len(arr) return np.sum(np.sort(arr) == arr, 0) == len(arr)
def verify_subgraph(g, subg, seed_id): def verify_subgraph(g, subg, seed_id):
src, dst, eid = g.in_edges(seed_id, form='all') src, dst, eid = g.in_edges(seed_id, form='all')
child_id = subg.map_to_subgraph_nid(seed_id) child_id = subg.map_to_subgraph_nid(seed_id)
child_src, child_dst, child_eid = subg.in_edges(child_id, form='all') child_src, child_dst, child_eid = subg.in_edges(child_id, form='all')
child_src = child_src.asnumpy() child_src = F.asnumpy(child_src)
# We don't allow duplicate elements in the neighbor list. # We don't allow duplicate elements in the neighbor list.
assert(len(np.unique(child_src)) == len(child_src)) assert(len(np.unique(child_src)) == len(child_src))
# The neighbor list also needs to be sorted. # The neighbor list also needs to be sorted.
assert(is_sorted(child_src)) assert(is_sorted(child_src))
child_src1 = subg.map_to_subgraph_nid(src).asnumpy() child_src1 = F.asnumpy(subg.map_to_subgraph_nid(src))
child_src1 = child_src1[child_src1 >= 0] child_src1 = child_src1[child_src1 >= 0]
for i in child_src: for i in child_src:
assert i in child_src1 assert i in child_src1
...@@ -84,7 +82,7 @@ def test_10neighbor_sampler_all(): ...@@ -84,7 +82,7 @@ def test_10neighbor_sampler_all():
child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all')
child_src1 = subg.map_to_subgraph_nid(src) child_src1 = subg.map_to_subgraph_nid(src)
assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src)
def check_10neighbor_sampler(g, seeds): def check_10neighbor_sampler(g, seeds):
# In this case, NeighborSampling simply gets the neighborhood of a single vertex. # In this case, NeighborSampling simply gets the neighborhood of a single vertex.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment