"vscode:/vscode.git/clone" did not exist on "8b0bc596de56bca578629f92b9e6cb8466cab2e2"
Unverified Commit eeeb52f4 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Feature] Preference to COO for "hypersparse" unit graphs & graph compaction (#1238)

* unit graph that prefers coo queries

* auto detect coo preference

* forgot some functions

* disable lint on detect_prefer_coo

* reorg

* change comment

* lint

* fix

* move array_utils.h to src

* compact graph impl

* fix redundant copying in idhashmap

* docstring

* moving preference detection to C

* lint

* fix unit test & address comments

* hypersparse autorestrict

* docstring & fix

* revert copyto and asnumbits

* fix stupid bug

* lint

* leave a TODO for sorted COO

* fixing same node type mapping to different id in different graphs

* addresses comments

* made induced nodes a feautre column

* lint?
parent 828a5e5b
......@@ -211,7 +211,7 @@ struct CSRMatrix {
runtime::NDArray indptr, indices;
/*! \brief data array, could be empty. */
runtime::NDArray data;
/*! \brief indicate that the edges are stored in the sorted order. */
/*! \brief whether the column indices per row are sorted */
bool sorted;
};
......@@ -229,7 +229,9 @@ struct COOMatrix {
int64_t num_rows, num_cols;
/*! \brief COO index arrays */
runtime::NDArray row, col;
/*! \brief data array, could be empty. */
/*!
* \brief data array, could be empty. When empty, assume it is from 0 to NNZ - 1.
*/
runtime::NDArray data;
};
......@@ -253,6 +255,11 @@ runtime::NDArray CSRGetRowColumnIndices(CSRMatrix , int64_t row);
/*! \brief Return the data array of the given row */
runtime::NDArray CSRGetRowData(CSRMatrix , int64_t row);
/*! \brief Whether the CSR matrix contains data */
inline bool CSRHasData(CSRMatrix csr) {
return csr.data.defined();
}
/* \brief Get data. The return type is an ndarray due to possible duplicate entries. */
runtime::NDArray CSRGetData(CSRMatrix , int64_t row, int64_t col);
/*!
......@@ -326,8 +333,39 @@ void CSRSort(CSRMatrix csr);
///////////////////////// COO routines //////////////////////////
/*! \return True if the matrix has duplicate entries */
bool COOHasDuplicate(COOMatrix coo);
/*! \brief Return true if the value (row, col) is non-zero */
bool COOIsNonZero(COOMatrix , int64_t row, int64_t col);
/*!
* \brief Batched implementation of COOIsNonZero.
* \note This operator allows broadcasting (i.e, either row or col can be of length 1).
*/
runtime::NDArray COOIsNonZero(COOMatrix, runtime::NDArray row, runtime::NDArray col);
/*! \brief Return the nnz of the given row */
int64_t COOGetRowNNZ(COOMatrix , int64_t row);
runtime::NDArray COOGetRowNNZ(COOMatrix , runtime::NDArray row);
/*! \brief Return the data array of the given row */
std::pair<runtime::NDArray, runtime::NDArray>
COOGetRowDataAndIndices(COOMatrix , int64_t row);
/*! \brief Whether the COO matrix contains data */
inline bool COOHasData(COOMatrix csr) {
return csr.data.defined();
}
/*! \brief Get data. The return type is an ndarray due to possible duplicate entries. */
runtime::NDArray COOGetData(COOMatrix , int64_t row, int64_t col);
/*!
* \brief Get the data and the row,col indices for each returned entries.
* \note This operator allows broadcasting (i.e, either row or col can be of length 1).
*/
std::vector<runtime::NDArray> COOGetDataAndIndices(
COOMatrix , runtime::NDArray rows, runtime::NDArray cols);
/*! \brief Return a transposed COO matrix */
COOMatrix COOTranspose(COOMatrix coo);
/*!
* \brief Convert COO matrix to CSR matrix.
......@@ -339,6 +377,32 @@ bool COOHasDuplicate(COOMatrix coo);
*/
CSRMatrix COOToCSR(COOMatrix coo);
/*!
* \brief Slice rows of the given matrix and return.
* \param coo COO matrix
* \param start Start row id (inclusive)
* \param end End row id (exclusive)
*/
COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end);
COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows);
/*!
* \brief Get the submatrix specified by the row and col ids.
*
* In numpy notation, given matrix M, row index array I, col index array J
* This function returns the submatrix M[I, J].
*
* \param coo The input coo matrix
* \param rows The row index to select
* \param cols The col index to select
* \return submatrix
*/
COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
/*! \return True if the matrix has duplicate entries */
bool COOHasDuplicate(COOMatrix coo);
// inline implementations
template <typename T>
IdArray VecToIdArray(const std::vector<T>& vec,
......@@ -399,7 +463,7 @@ IdArray VecToIdArray(const std::vector<T>& vec,
/*
* Dispatch according to float type (either float32 or float64):
*
* ATEN_ID_TYPE_SWITCH(array->dtype, FloatType, {
* ATEN_FLOAT_TYPE_SWITCH(array->dtype, FloatType, {
* // Now FloatType is the type corresponding to data type in array.
* // For instance, one can do this for a CPU array:
* FloatType *data = static_cast<FloatType *>(array->data);
......@@ -422,7 +486,7 @@ IdArray VecToIdArray(const std::vector<T>& vec,
/*
* Dispatch according to data type (int32, int64, float32 or float64):
*
* ATEN_ID_TYPE_SWITCH(array->dtype, DType, {
* ATEN_DTYPE_SWITCH(array->dtype, DType, {
* // Now DType is the type corresponding to data type in array.
* // For instance, one can do this for a CPU array:
* DType *data = static_cast<DType *>(array->data);
......
......@@ -90,6 +90,11 @@ class BaseHeteroGraph : public runtime::Object {
*/
virtual void Clear() = 0;
/*!
* \brief Get the data type of node and edge IDs of this graph.
*/
virtual DLDataType DataType() const = 0;
/*!
* \brief Get the device context of this graph.
*/
......@@ -98,6 +103,7 @@ class BaseHeteroGraph : public runtime::Object {
/*!
* \brief Get the number of integer bits used to store node/edge ids (32 or 64).
*/
// TODO(BarclayII) replace NumBits() calls to DataType() calls
virtual uint8_t NumBits() const = 0;
/*!
......@@ -464,19 +470,44 @@ DGL_DEFINE_OBJECT_REF(HeteroSubgraphRef, HeteroSubgraph);
// creators
/*! \brief Create a bipartite graph from COO arrays */
HeteroGraphPtr CreateBipartiteFromCOO(
int64_t num_src, int64_t num_dst, IdArray row, IdArray col);
/*!
* \brief Sparse graph format.
*/
enum class SparseFormat {
ANY = 0,
COO = 1,
CSR = 2,
CSC = 3
};
/*! \brief Create a bipartite graph from (out) CSR arrays */
HeteroGraphPtr CreateBipartiteFromCSR(
int64_t num_src, int64_t num_dst,
IdArray indptr, IdArray indices, IdArray edge_ids);
inline SparseFormat ParseSparseFormat(const std::string& name) {
if (name == "coo")
return SparseFormat::COO;
else if (name == "csr")
return SparseFormat::CSR;
else if (name == "csc")
return SparseFormat::CSC;
else
return SparseFormat::ANY;
}
/*! \brief Create a heterograph from meta graph and a list of bipartite graph */
HeteroGraphPtr CreateHeteroGraph(
GraphPtr meta_graph, const std::vector<HeteroGraphPtr>& rel_graphs);
/*!
* \brief Given a list of graphs, remove the common nodes that do not have inbound and
* outbound edges.
*
* The graphs should have identical node ID space (i.e. should have the same set of nodes,
* including types and IDs) and metagraph.
*
* \return A pair. The first element is the list of compacted graphs, and the second
* element is the mapping from the compacted graphs and the original graph.
*/
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs(const std::vector<HeteroGraphPtr> &graphs);
}; // namespace dgl
#endif // DGL_BASE_HETEROGRAPH_H_
"""Module for converting graph from/to other object."""
from collections import defaultdict
from collections.abc import Iterable
import numpy as np
import scipy as sp
import networkx as nx
......@@ -19,9 +20,11 @@ __all__ = [
'to_hetero',
'to_homo',
'to_networkx',
'compact_graphs',
]
def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
def graph(data, ntype='_N', etype='_E', card=None, validate=True, restrict_format='any',
**kwargs):
"""Create a graph with one type of nodes and edges.
In the sparse matrix perspective, :func:`dgl.graph` creates a graph
......@@ -49,6 +52,8 @@ def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and card is not None, user would receive a warning.
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
kwargs : key-word arguments, optional
Other key word arguments. Only comes into effect when we are using a NetworkX
graph. It can consist of:
......@@ -122,17 +127,24 @@ def graph(data, ntype='_N', etype='_E', card=None, validate=True, **kwargs):
urange, vrange = None, None
if isinstance(data, tuple):
u, v = data
return create_from_edges(u, v, ntype, etype, ntype, urange, vrange, validate)
return create_from_edges(
u, v, ntype, etype, ntype, urange, vrange, validate,
restrict_format=restrict_format)
elif isinstance(data, list):
return create_from_edge_list(data, ntype, etype, ntype, urange, vrange, validate)
return create_from_edge_list(
data, ntype, etype, ntype, urange, vrange, validate,
restrict_format=restrict_format)
elif isinstance(data, sp.sparse.spmatrix):
return create_from_scipy(data, ntype, etype, ntype)
return create_from_scipy(
data, ntype, etype, ntype, restrict_format=restrict_format)
elif isinstance(data, nx.Graph):
return create_from_networkx(data, ntype, etype, **kwargs)
return create_from_networkx(
data, ntype, etype, restrict_format=restrict_format, **kwargs)
else:
raise DGLError('Unsupported graph data type:', type(data))
def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True, **kwargs):
def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True,
restrict_format='any', **kwargs):
"""Create a bipartite graph.
The result graph is directed and edges must be from ``utype`` nodes
......@@ -165,6 +177,8 @@ def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True
If True, check if node ids are within cardinality, the check process may take
some time. (Default: True)
If False and card is not None, user would receive a warning.
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
kwargs : key-word arguments, optional
Other key word arguments. Only comes into effect when we are using a NetworkX
graph. It can consist of:
......@@ -253,13 +267,19 @@ def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, validate=True
urange, vrange = None, None
if isinstance(data, tuple):
u, v = data
return create_from_edges(u, v, utype, etype, vtype, urange, vrange, validate)
return create_from_edges(
u, v, utype, etype, vtype, urange, vrange, validate,
restrict_format=restrict_format)
elif isinstance(data, list):
return create_from_edge_list(data, utype, etype, vtype, urange, vrange, validate)
return create_from_edge_list(
data, utype, etype, vtype, urange, vrange, validate,
restrict_format=restrict_format)
elif isinstance(data, sp.sparse.spmatrix):
return create_from_scipy(data, utype, etype, vtype)
return create_from_scipy(
data, utype, etype, vtype, restrict_format=restrict_format)
elif isinstance(data, nx.Graph):
return create_from_networkx_bipartite(data, utype, etype, vtype, **kwargs)
return create_from_networkx_bipartite(
data, utype, etype, vtype, restrict_format=restrict_format, **kwargs)
else:
raise DGLError('Unsupported graph data type:', type(data))
......@@ -331,24 +351,29 @@ def hetero_from_relations(rel_graphs):
# TODO(minjie): this API can be generalized as a union operation of the input graphs
# TODO(minjie): handle node/edge data
# infer meta graph
ntype_dict = {} # ntype -> ntid
ntype_set = set()
meta_edges = []
ntypes = []
etypes = []
# TODO(BarclayII): I'm keeping the node type names sorted because even if
# the metagraph is the same, the same node type name in different graphs may
# map to different node type IDs.
# In the future, we need to lower the type names into C++.
for rgrh in rel_graphs:
assert len(rgrh.etypes) == 1
stype, etype, dtype = rgrh.canonical_etypes[0]
if stype not in ntype_dict:
ntype_dict[stype] = len(ntypes)
ntypes.append(stype)
ntype_set.add(stype)
ntype_set.add(dtype)
ntypes = list(sorted(ntype_set))
ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)}
for rgrh in rel_graphs:
stype, etype, dtype = rgrh.canonical_etypes[0]
stid = ntype_dict[stype]
if dtype not in ntype_dict:
ntype_dict[dtype] = len(ntypes)
ntypes.append(dtype)
dtid = ntype_dict[dtype]
meta_edges.append((stid, dtid))
etypes.append(etype)
metagraph = graph_index.from_edge_list(meta_edges, True, True)
# create graph index
hgidx = heterograph_index.create_heterograph_from_relations(
metagraph, [rgrh._graph for rgrh in rel_graphs])
......@@ -699,11 +724,104 @@ def to_homo(G):
return retg
def compact_graphs(graphs):
"""Given a list of graphs with the same set of nodes, find and eliminate the common
isolated nodes across all graphs.
This function requires the graphs to have the same set of nodes (i.e. the node types
must be the same, and the number of nodes of each node type must be the same). The
metagraph does not have to be the same.
It finds all the nodes that have zero in-degree and zero out-degree in all the given
graphs, and eliminates them from all the graphs.
Useful for graph sampling where we have a giant graph but we only wish to perform
message passing on a smaller graph with a (tiny) subset of nodes.
The node and edge features are not preserved.
Parameters
----------
graphs : DGLHeteroGraph or list[DGLHeteroGraph]
The graph, or list of graphs
Returns
-------
DGLHeteroGraph or list[DGLHeteroGraph]
The compacted graph or list of compacted graphs.
Each returned graph would have a feature ``dgl.NID`` containing the mapping
of node IDs for each type from the compacted graph(s) to the original graph(s).
Note that the mapping is the same for all the compacted graphs.
Examples
--------
The following code constructs a bipartite graph with 20 users and 10 games, but
only user #1 and #3, as well as game #3 and #5, have connections:
>>> g = dgl.bipartite([(1, 3), (3, 5)], 'user', 'plays', 'game', card=(20, 10))
The following would compact the graph above to another bipartite graph with only
two users and two games.
>>> new_g, induced_nodes = dgl.compact_graphs(g)
>>> induced_nodes
{'user': tensor([1, 3]), 'game': tensor([3, 5])}
The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
Furthermore, the first user and second user in the compacted graph maps to
user #1 and #3 in the original graph. Games are similar.
One can verify that the edge connections are kept the same in the compacted graph.
>>> new_g.edges(form='all', order='eid', etype='plays')
(tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
When compacting multiple graphs, nodes that do not have any connections in any
of the given graphs are removed. So if we compact ``g`` and the following ``g2``
graphs together:
>>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', card=(20, 10))
>>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
>>> induced_nodes
{'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}
Then one can see that user #1 from both graphs, users #3 from the first graph, as
well as user #6 from the second graph, are kept. Games are similar.
Similarly, one can also verify the connections:
>>> new_g.edges(form='all', order='eid', etype='plays')
(tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
>>> new_g2.edges(form='all', order='eid', etype='plays')
(tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
"""
return_single = False
if not isinstance(graphs, Iterable):
graphs = [graphs]
return_single = True
new_graph_indexes, induced_nodes = heterograph_index.compact_graph_indexes(
[g._graph for g in graphs])
new_graphs = [
DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
for new_graph_index, graph in zip(new_graph_indexes, graphs)]
for g in new_graphs:
for i, ntype in enumerate(graphs[0].ntypes):
g.nodes[ntype].data[NID] = induced_nodes[i]
if return_single:
new_graphs = new_graphs[0]
return new_graphs
############################################################
# Internal APIs
############################################################
def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, validate=True):
def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, validate=True,
restrict_format="any"):
"""Internal function to create a graph from incident nodes with types.
utype could be equal to vtype
......@@ -728,6 +846,8 @@ def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, valid
maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
Returns
-------
......@@ -755,13 +875,16 @@ def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None, valid
num_ntypes = 1
else:
num_ntypes = 2
hgidx = heterograph_index.create_unitgraph_from_coo(num_ntypes, urange, vrange, u, v)
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, restrict_format)
if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype])
else:
return DGLHeteroGraph(hgidx, [utype, vtype], [etype])
def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None, validate=True):
def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
validate=True, restrict_format='any'):
"""Internal function to create a heterograph from a list of edge tuples with types.
utype could be equal to vtype
......@@ -784,7 +907,8 @@ def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
Returns
-------
......@@ -796,9 +920,11 @@ def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None,
u, v = zip(*elist)
u = list(u)
v = list(v)
return create_from_edges(u, v, utype, etype, vtype, urange, vrange, validate)
return create_from_edges(
u, v, utype, etype, vtype, urange, vrange, validate, restrict_format)
def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False,
restrict_format='any'):
"""Internal function to create a heterograph from a scipy sparse matrix with types.
Parameters
......@@ -818,7 +944,8 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
(source, destination) order.
validate : bool, optional
If True, checks if node IDs are within range.
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
Returns
-------
......@@ -830,7 +957,7 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
row = utils.toindex(spmat.row)
col = utils.toindex(spmat.col)
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, num_src, num_dst, row, col)
num_ntypes, num_src, num_dst, row, col, restrict_format)
else:
spmat = spmat.tocsr()
indptr = utils.toindex(spmat.indptr)
......@@ -838,7 +965,7 @@ def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
# TODO(minjie): with_edge_id is only reasonable for csr matrix. How to fix?
data = utils.toindex(spmat.data if with_edge_id else list(range(len(indices))))
hgidx = heterograph_index.create_unitgraph_from_csr(
num_ntypes, num_src, num_dst, indptr, indices, data)
num_ntypes, num_src, num_dst, indptr, indices, data, restrict_format)
if num_ntypes == 1:
return DGLHeteroGraph(hgidx, [utype], [etype])
else:
......@@ -848,7 +975,8 @@ def create_from_networkx(nx_graph,
ntype, etype,
edge_id_attr_name='id',
node_attrs=None,
edge_attrs=None):
edge_attrs=None,
restrict_format='any'):
"""Create a heterograph that has only one set of nodes and edges.
Parameters
......@@ -865,6 +993,8 @@ def create_from_networkx(nx_graph,
Names for node features to retrieve from the NetworkX graph (Default: None)
edge_attrs : list of str
Names for edge features to retrieve from the NetworkX graph (Default: None)
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
Returns
-------
......@@ -899,7 +1029,8 @@ def create_from_networkx(nx_graph,
src = utils.toindex(src)
dst = utils.toindex(dst)
num_nodes = nx_graph.number_of_nodes()
g = create_from_edges(src, dst, ntype, etype, ntype, num_nodes, num_nodes, validate=False)
g = create_from_edges(src, dst, ntype, etype, ntype, num_nodes, num_nodes,
validate=False, restrict_format=restrict_format)
# handle features
# copy attributes
......@@ -950,7 +1081,8 @@ def create_from_networkx_bipartite(nx_graph,
utype, etype, vtype,
edge_id_attr_name='id',
node_attrs=None,
edge_attrs=None):
edge_attrs=None,
restrict_format='any'):
"""Create a heterograph that has one set of source nodes, one set of
destination nodes and one set of edges.
......@@ -974,6 +1106,8 @@ def create_from_networkx_bipartite(nx_graph,
Names for node features to retrieve from the NetworkX graph (Default: None)
edge_attrs : list of str
Names for edge features to retrieve from the NetworkX graph (Default: None)
restrict_format : 'any', 'coo', 'csr', 'csc', optional
Force the storage format. Default: 'any' (i.e. let DGL decide what to use).
Returns
-------
......@@ -1013,7 +1147,7 @@ def create_from_networkx_bipartite(nx_graph,
dst = utils.toindex(dst)
g = create_from_edges(
src, dst, utype, etype, vtype,
len(top_nodes), len(bottom_nodes), validate=False)
len(top_nodes), len(bottom_nodes), validate=False, restrict_format=restrict_format)
# TODO attributes
assert node_attrs is None, 'Retrieval of node attributes are not supported yet.'
......
......@@ -43,7 +43,7 @@ class HeteroGraphIndex(ObjectBase):
num_dst = number_of_nodes[dst_ntype]
src_id, dst_id, _ = edges_per_type
rel_graphs.append(create_unitgraph_from_coo(
1 if src_ntype == dst_ntype else 2, num_src, num_dst, src_id, dst_id))
1 if src_ntype == dst_ntype else 2, num_src, num_dst, src_id, dst_id, 'any'))
self.__init_handle_by_constructor__(
_CAPI_DGLHeteroCreateHeteroGraph, metagraph, rel_graphs)
......@@ -957,7 +957,8 @@ class HeteroSubgraphIndex(ObjectBase):
# Creators
#################################################################
def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col):
def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
restrict_format):
"""Create a unitgraph graph index from COO format
Parameters
......@@ -972,15 +973,19 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col):
Row index.
col : utils.Index
Col index.
restrict_format : "any", "coo", "csr" or "csc"
Restrict the storage format of the unit graph.
Returns
-------
HeteroGraphIndex
"""
return _CAPI_DGLHeteroCreateUnitGraphFromCOO(
int(num_ntypes), int(num_src), int(num_dst), row.todgltensor(), col.todgltensor())
int(num_ntypes), int(num_src), int(num_dst), row.todgltensor(), col.todgltensor(),
restrict_format)
def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids):
def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
restrict_format):
"""Create a unitgraph graph index from CSR format
Parameters
......@@ -997,6 +1002,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
CSR indices.
edge_ids : utils.Index
Edge shuffle id.
restrict_format : "any", "coo", "csr" or "csc"
Restrict the storage format of the unit graph.
Returns
-------
......@@ -1004,7 +1011,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
"""
return _CAPI_DGLHeteroCreateUnitGraphFromCSR(
int(num_ntypes), int(num_src), int(num_dst),
indptr.todgltensor(), indices.todgltensor(), edge_ids.todgltensor())
indptr.todgltensor(), indices.todgltensor(), edge_ids.todgltensor(),
restrict_format)
def create_heterograph_from_relations(metagraph, rel_graphs):
"""Create a heterograph from metagraph and graphs of every relation.
......@@ -1061,6 +1069,31 @@ def disjoint_partition(graph, bnn_all_types, bne_all_types):
return _CAPI_DGLHeteroDisjointPartitionBySizes(
graph, bnn_all_types.todgltensor(), bne_all_types.todgltensor())
def compact_graph_indexes(graphs):
"""Given a list of graphs, remove the common nodes that do not have inbound and
outbound edges.
The graphs should have identical node space (i.e. should have the same set of
nodes, including types and IDs) and metagraph.
Parameters
----------
graph : list[HeteroGraphIndex]
List of heterographs.
Returns
-------
list[HeteroGraphIndex]
A list of compacted heterographs.
The returned heterographs also have the same metagraph, which is identical
to the original heterographs.
The returned heterographs also have identical node space.
list[Tensor]
The induced node IDs of each node type.
"""
new_graphs, induced_nodes = _CAPI_DGLCompactGraphs(graphs)
return new_graphs, [F.zerocopy_from_dgl_ndarray(nodes.data) for nodes in induced_nodes]
@register_object("graph.FlattenedHeteroGraph")
class FlattenedHeteroGraph(ObjectBase):
"""FlattenedHeteroGraph object class in C++ backend."""
......
......@@ -168,7 +168,7 @@ def build_gidx_and_mapping_uv(edge_tuples, num_src, num_dst):
Number of ints needed to represent the graph
"""
u, v, eid = edge_tuples
gidx = create_unitgraph_from_coo(2, num_src, num_dst, u, v)
gidx = create_unitgraph_from_coo(2, num_src, num_dst, u, v, 'any')
forward, backward = gidx.get_csr_shuffle_order(0)
eid = eid.tousertensor()
nbits = gidx.bits_needed(0)
......
......@@ -411,6 +411,22 @@ void CSRSort(CSRMatrix csr) {
///////////////////////// COO routines //////////////////////////
bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) {
bool ret = false;
ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
ret = impl::COOIsNonZero<XPU, IdType>(coo, row, col);
});
return ret;
}
NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) {
NDArray ret;
ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
ret = impl::COOIsNonZero<XPU, IdType>(coo, row, col);
});
return ret;
}
bool COOHasDuplicate(COOMatrix coo) {
bool ret = false;
ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
......@@ -419,6 +435,55 @@ bool COOHasDuplicate(COOMatrix coo) {
return ret;
}
int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
int64_t ret = 0;
ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
ret = impl::COOGetRowNNZ<XPU, IdType>(coo, row);
});
return ret;
}
NDArray COOGetRowNNZ(COOMatrix coo, NDArray row) {
NDArray ret;
ATEN_COO_IDX_SWITCH(coo, XPU, IdType, {
ret = impl::COOGetRowNNZ<XPU, IdType>(coo, row);
});
return ret;
}
std::pair<NDArray, NDArray> COOGetRowDataAndIndices(COOMatrix coo, int64_t row) {
std::pair<NDArray, NDArray> ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOGetRowDataAndIndices<XPU, IdType, DType>(coo, row);
});
return ret;
}
NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col) {
NDArray ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOGetData<XPU, IdType, DType>(coo, row, col);
});
return ret;
}
std::vector<NDArray> COOGetDataAndIndices(
COOMatrix coo, NDArray rows, NDArray cols) {
std::vector<NDArray> ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOGetDataAndIndices<XPU, IdType, DType>(coo, rows, cols);
});
return ret;
}
COOMatrix COOTranspose(COOMatrix coo) {
COOMatrix ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOTranspose<XPU, IdType, DType>(coo);
});
return ret;
}
CSRMatrix COOToCSR(COOMatrix coo) {
CSRMatrix ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
......@@ -427,5 +492,29 @@ CSRMatrix COOToCSR(COOMatrix coo) {
return ret;
}
COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) {
COOMatrix ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOSliceRows<XPU, IdType, DType>(coo, start, end);
});
return ret;
}
COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) {
COOMatrix ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOSliceRows<XPU, IdType, DType>(coo, rows);
});
return ret;
}
COOMatrix COOSliceMatrix(COOMatrix coo, NDArray rows, NDArray cols) {
COOMatrix ret;
ATEN_COO_SWITCH(coo, XPU, IdType, DType, {
ret = impl::COOSliceMatrix<XPU, IdType, DType>(coo, rows, cols);
});
return ret;
}
} // namespace aten
} // namespace dgl
......@@ -107,12 +107,48 @@ CSRMatrix CSRSliceMatrix(CSRMatrix csr, runtime::NDArray rows, runtime::NDArray
template <DLDeviceType XPU, typename IdType, typename DType>
void CSRSort(CSRMatrix csr);
template <DLDeviceType XPU, typename IdType>
bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col);
template <DLDeviceType XPU, typename IdType>
runtime::NDArray COOIsNonZero(COOMatrix coo, runtime::NDArray row, runtime::NDArray col);
template <DLDeviceType XPU, typename IdType>
bool COOHasDuplicate(COOMatrix coo);
template <DLDeviceType XPU, typename IdType>
int64_t COOGetRowNNZ(COOMatrix coo, int64_t row);
template <DLDeviceType XPU, typename IdType>
runtime::NDArray COOGetRowNNZ(COOMatrix coo, runtime::NDArray row);
template <DLDeviceType XPU, typename IdType, typename DType>
std::pair<runtime::NDArray, runtime::NDArray>
COOGetRowDataAndIndices(COOMatrix coo, int64_t row);
template <DLDeviceType XPU, typename IdType, typename DType>
runtime::NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col);
template <DLDeviceType XPU, typename IdType, typename DType>
std::vector<runtime::NDArray> COOGetDataAndIndices(
COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOTranspose(COOMatrix coo);
template <DLDeviceType XPU, typename IdType, typename DType>
CSRMatrix COOToCSR(COOMatrix coo);
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end);
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows);
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
} // namespace impl
} // namespace aten
} // namespace dgl
......
/*!
* Copyright (c) 2019 by Contributors
* \file dgl/array_utils.h
* \brief Utility classes and functions for DGL arrays.
*/
#ifndef DGL_ARRAY_CPU_ARRAY_UTILS_H_
#define DGL_ARRAY_CPU_ARRAY_UTILS_H_
#include <dgl/array.h>
#include <vector>
#include <unordered_map>
#include <utility>
namespace dgl {
namespace aten {
/*!
* \brief A hashmap that maps each ids in the given array to new ids starting from zero.
*
* Useful for relabeling integers and finding unique integers.
*
* Usually faster than std::unordered_map in existence checking.
*/
template <typename IdType>
class IdHashMap {
public:
// default ctor
IdHashMap(): filter_(kFilterSize, false) {}
// Construct the hashmap using the given id array.
// The id array could contain duplicates.
explicit IdHashMap(IdArray ids): filter_(kFilterSize, false) {
Update(ids);
}
// Update the hashmap with given id array.
// The id array could contain duplicates.
void Update(IdArray ids) {
const IdType* ids_data = static_cast<IdType*>(ids->data);
const int64_t len = ids->shape[0];
IdType newid = oldv2newv_.size();
for (int64_t i = 0; i < len; ++i) {
const IdType id = ids_data[i];
if (!Contains(id)) {
oldv2newv_[id] = newid++;
filter_[id & kFilterMask] = true;
}
}
}
// Return true if the given id is contained in this hashmap.
bool Contains(IdType id) const {
return filter_[id & kFilterMask] && oldv2newv_.count(id);
}
// Return the new id of the given id. If the given id is not contained
// in the hash map, returns the default_val instead.
IdType Map(IdType id, IdType default_val) const {
if (filter_[id & kFilterMask]) {
auto it = oldv2newv_.find(id);
return (it == oldv2newv_.end()) ? default_val : it->second;
} else {
return default_val;
}
}
// Return the new id of each id in the given array.
IdArray Map(IdArray ids, IdType default_val) const {
const IdType* ids_data = static_cast<IdType*>(ids->data);
const int64_t len = ids->shape[0];
IdArray values = NewIdArray(len, ids->ctx, ids->dtype.bits);
IdType* values_data = static_cast<IdType*>(values->data);
for (int64_t i = 0; i < len; ++i)
values_data[i] = Map(ids_data[i], default_val);
return values;
}
// Return all the old ids collected so far, ordered by new id.
IdArray Values() const {
IdArray values = NewIdArray(oldv2newv_.size(), DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
IdType* values_data = static_cast<IdType*>(values->data);
for (auto pair : oldv2newv_)
values_data[pair.second] = pair.first;
return values;
}
private:
static constexpr int32_t kFilterMask = 0xFFFFFF;
static constexpr int32_t kFilterSize = kFilterMask + 1;
// This bitmap is used as a bloom filter to remove some lookups.
// Hashtable is very slow. Using bloom filter can significantly speed up lookups.
std::vector<bool> filter_;
// The hashmap from old vid to new vid
std::unordered_map<IdType, IdType> oldv2newv_;
};
/*
* \brief Hash type for building maps/sets with pairs as keys.
*/
struct PairHash {
template <class T1, class T2>
std::size_t operator() (const std::pair<T1, T2>& pair) const {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
}; // namespace aten
}; // namespace dgl
#endif // DGL_ARRAY_CPU_ARRAY_UTILS_H_
......@@ -6,6 +6,7 @@
#include <dgl/array.h>
#include <vector>
#include <unordered_set>
#include "array_utils.h"
namespace dgl {
......@@ -13,69 +14,6 @@ using runtime::NDArray;
namespace aten {
namespace impl {
namespace {
/*!
* \brief A hashmap that maps each ids in the given array to new ids starting from zero.
*/
template <typename IdType>
class IdHashMap {
public:
// Construct the hashmap using the given id arrays.
// The id array could contain duplicates.
explicit IdHashMap(IdArray ids): filter_(kFilterSize, false) {
const IdType* ids_data = static_cast<IdType*>(ids->data);
const int64_t len = ids->shape[0];
IdType newid = 0;
for (int64_t i = 0; i < len; ++i) {
const IdType id = ids_data[i];
if (!Contains(id)) {
oldv2newv_[id] = newid++;
filter_[id & kFilterMask] = true;
}
}
}
// Return true if the given id is contained in this hashmap.
bool Contains(IdType id) const {
return filter_[id & kFilterMask] && oldv2newv_.count(id);
}
// Return the new id of the given id. If the given id is not contained
// in the hash map, returns the default_val instead.
IdType Map(IdType id, IdType default_val) const {
if (filter_[id & kFilterMask]) {
auto it = oldv2newv_.find(id);
return (it == oldv2newv_.end()) ? default_val : it->second;
} else {
return default_val;
}
}
private:
static constexpr int32_t kFilterMask = 0xFFFFFF;
static constexpr int32_t kFilterSize = kFilterMask + 1;
// This bitmap is used as a bloom filter to remove some lookups.
// Hashtable is very slow. Using bloom filter can significantly speed up lookups.
std::vector<bool> filter_;
// The hashmap from old vid to new vid
std::unordered_map<IdType, IdType> oldv2newv_;
};
struct PairHash {
template <class T1, class T2>
std::size_t operator() (const std::pair<T1, T2>& pair) const {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
inline bool CSRHasData(CSRMatrix csr) {
return csr.data.defined();
}
inline bool COOHasData(COOMatrix csr) {
return csr.data.defined();
}
} // namespace
///////////////////////////// CSRIsNonZero /////////////////////////////
......@@ -649,91 +587,6 @@ void CSRSort(CSRMatrix csr) {
template void CSRSort<kDLCPU, int64_t, int64_t>(CSRMatrix csr);
template void CSRSort<kDLCPU, int32_t, int32_t>(CSRMatrix csr);
///////////////////////////// COOHasDuplicate /////////////////////////////
template <DLDeviceType XPU, typename IdType>
bool COOHasDuplicate(COOMatrix coo) {
std::unordered_set<std::pair<IdType, IdType>, PairHash> hashmap;
const IdType* src_data = static_cast<IdType*>(coo.row->data);
const IdType* dst_data = static_cast<IdType*>(coo.col->data);
const auto nnz = coo.row->shape[0];
for (IdType eid = 0; eid < nnz; ++eid) {
const auto& p = std::make_pair(src_data[eid], dst_data[eid]);
if (hashmap.count(p)) {
return true;
} else {
hashmap.insert(p);
}
}
return false;
}
template bool COOHasDuplicate<kDLCPU, int32_t>(COOMatrix coo);
template bool COOHasDuplicate<kDLCPU, int64_t>(COOMatrix coo);
///////////////////////////// COOToCSR /////////////////////////////
// complexity: time O(NNZ), space O(1)
template <DLDeviceType XPU, typename IdType, typename DType>
CSRMatrix COOToCSR(COOMatrix coo) {
const int64_t N = coo.num_rows;
const int64_t NNZ = coo.row->shape[0];
const IdType* row_data = static_cast<IdType*>(coo.row->data);
const IdType* col_data = static_cast<IdType*>(coo.col->data);
NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx);
NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
NDArray ret_data;
if (COOHasData(coo)) {
ret_data = NDArray::Empty({NNZ}, coo.data->dtype, coo.data->ctx);
} else {
// if no data array in the input coo, the return data array is a shuffle index.
ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
}
IdType* Bp = static_cast<IdType*>(ret_indptr->data);
IdType* Bi = static_cast<IdType*>(ret_indices->data);
std::fill(Bp, Bp + N, 0);
for (int64_t i = 0; i < NNZ; ++i) {
Bp[row_data[i]]++;
}
// cumsum
for (int64_t i = 0, cumsum = 0; i < N; ++i) {
const IdType temp = Bp[i];
Bp[i] = cumsum;
cumsum += temp;
}
Bp[N] = NNZ;
for (int64_t i = 0; i < NNZ; ++i) {
const IdType r = row_data[i];
Bi[Bp[r]] = col_data[i];
if (COOHasData(coo)) {
const DType* data = static_cast<DType*>(coo.data->data);
DType* Bx = static_cast<DType*>(ret_data->data);
Bx[Bp[r]] = data[i];
} else {
IdType* Bx = static_cast<IdType*>(ret_data->data);
Bx[Bp[r]] = i;
}
Bp[r]++;
}
// correct the indptr
for (int64_t i = 0, last = 0; i <= N; ++i) {
IdType temp = Bp[i];
Bp[i] = last;
last = temp;
}
return CSRMatrix{coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data};
}
template CSRMatrix COOToCSR<kDLCPU, int32_t, int32_t>(COOMatrix coo);
template CSRMatrix COOToCSR<kDLCPU, int64_t, int64_t>(COOMatrix coo);
} // namespace impl
} // namespace aten
} // namespace dgl
/*!
* Copyright (c) 2019 by Contributors
* \file array/cpu/spmat_op_impl.cc
* \brief CPU implementation of COO sparse matrix operators
*/
#include <dgl/array.h>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include "array_utils.h"
namespace dgl {
using runtime::NDArray;
namespace aten {
namespace impl {
/*
* TODO(BarclayII):
* For row-major sorted COOs, we have faster implementation with binary search,
* sorted search, etc. Later we should benchmark how much we can gain with
* sorted COOs on hypersparse graphs.
*/
///////////////////////////// COOIsNonZero /////////////////////////////
template <DLDeviceType XPU, typename IdType>
bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) {
CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
CHECK(col >= 0 && col < coo.num_cols) << "Invalid col index: " << col;
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
if (coo_row_data[i] == row && coo_col_data[i] == col)
return true;
}
return false;
}
template bool COOIsNonZero<kDLCPU, int32_t>(COOMatrix, int64_t, int64_t);
template bool COOIsNonZero<kDLCPU, int64_t>(COOMatrix, int64_t, int64_t);
template <DLDeviceType XPU, typename IdType>
NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) {
const auto rowlen = row->shape[0];
const auto collen = col->shape[0];
const auto rstlen = std::max(rowlen, collen);
NDArray rst = NDArray::Empty({rstlen}, row->dtype, row->ctx);
IdType* rst_data = static_cast<IdType*>(rst->data);
const IdType* row_data = static_cast<IdType*>(row->data);
const IdType* col_data = static_cast<IdType*>(col->data);
const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1;
const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1;
const int64_t kmax = std::max(rowlen, collen);
#pragma omp parallel for
for (int64_t k = 0; k < kmax; ++k) {
int64_t i = row_stride * k;
int64_t j = col_stride * k;
rst_data[k] = COOIsNonZero<XPU, IdType>(coo, row_data[i], col_data[j])? 1 : 0;
}
return rst;
}
template NDArray COOIsNonZero<kDLCPU, int32_t>(COOMatrix, NDArray, NDArray);
template NDArray COOIsNonZero<kDLCPU, int64_t>(COOMatrix, NDArray, NDArray);
///////////////////////////// COOHasDuplicate /////////////////////////////
template <DLDeviceType XPU, typename IdType>
bool COOHasDuplicate(COOMatrix coo) {
std::unordered_set<std::pair<IdType, IdType>, PairHash> hashmap;
const IdType* src_data = static_cast<IdType*>(coo.row->data);
const IdType* dst_data = static_cast<IdType*>(coo.col->data);
const auto nnz = coo.row->shape[0];
for (IdType eid = 0; eid < nnz; ++eid) {
const auto& p = std::make_pair(src_data[eid], dst_data[eid]);
if (hashmap.count(p)) {
return true;
} else {
hashmap.insert(p);
}
}
return false;
}
template bool COOHasDuplicate<kDLCPU, int32_t>(COOMatrix coo);
template bool COOHasDuplicate<kDLCPU, int64_t>(COOMatrix coo);
///////////////////////////// COOGetRowNNZ /////////////////////////////
template <DLDeviceType XPU, typename IdType>
int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) {
CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
int64_t result = 0;
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
if (coo_row_data[i] == row)
++result;
}
return result;
}
template int64_t COOGetRowNNZ<kDLCPU, int32_t>(COOMatrix, int64_t);
template int64_t COOGetRowNNZ<kDLCPU, int64_t>(COOMatrix, int64_t);
template <DLDeviceType XPU, typename IdType>
NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) {
const auto len = rows->shape[0];
const IdType* vid_data = static_cast<IdType*>(rows->data);
NDArray rst = NDArray::Empty({len}, rows->dtype, rows->ctx);
IdType* rst_data = static_cast<IdType*>(rst->data);
#pragma omp parallel for
for (int64_t i = 0; i < len; ++i)
rst_data[i] = COOGetRowNNZ<XPU, IdType>(coo, vid_data[i]);
return rst;
}
template NDArray COOGetRowNNZ<kDLCPU, int32_t>(COOMatrix, NDArray);
template NDArray COOGetRowNNZ<kDLCPU, int64_t>(COOMatrix, NDArray);
///////////////////////////// COOGetRowDataAndIndices /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
std::pair<NDArray, NDArray> COOGetRowDataAndIndices(
COOMatrix coo, int64_t row) {
CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
std::vector<IdType> indices;
std::vector<DType> data;
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
if (coo_row_data[i] == row) {
indices.push_back(coo_col_data[i]);
data.push_back(coo_data ? coo_data[i] : i);
}
}
return std::make_pair(NDArray::FromVector(data), NDArray::FromVector(indices));
}
template std::pair<NDArray, NDArray>
COOGetRowDataAndIndices<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t);
template std::pair<NDArray, NDArray>
COOGetRowDataAndIndices<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t);
///////////////////////////// COOGetData /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
NDArray COOGetData(COOMatrix coo, int64_t row, int64_t col) {
CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row;
CHECK(col >= 0 && col < coo.num_cols) << "Invalid col index: " << col;
std::vector<DType> ret_vec;
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
for (IdType i = 0; i < coo.row->shape[0]; ++i) {
if (coo_row_data[i] == row && coo_col_data[i] == col)
ret_vec.push_back(data ? data[i] : i);
}
return NDArray::FromVector(ret_vec);
}
template NDArray COOGetData<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t, int64_t);
template NDArray COOGetData<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t, int64_t);
///////////////////////////// COOGetDataAndIndices /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
std::vector<NDArray> COOGetDataAndIndices(
COOMatrix coo, NDArray rows, NDArray cols) {
const int64_t rowlen = rows->shape[0];
const int64_t collen = cols->shape[0];
CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1))
<< "Invalid row and col id array.";
const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1;
const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1;
const IdType* row_data = static_cast<IdType*>(rows->data);
const IdType* col_data = static_cast<IdType*>(cols->data);
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
std::vector<IdType> ret_rows, ret_cols;
std::vector<DType> ret_data;
for (int64_t i = 0, j = 0; i < rowlen && j < collen; i += row_stride, j += col_stride) {
const IdType row_id = row_data[i], col_id = col_data[j];
CHECK(row_id >= 0 && row_id < coo.num_rows) << "Invalid row index: " << row_id;
CHECK(col_id >= 0 && col_id < coo.num_cols) << "Invalid col index: " << col_id;
for (int64_t k = 0; k < coo.row->shape[0]; ++k) {
if (coo_row_data[k] == row_id && coo_col_data[k] == col_id) {
ret_rows.push_back(row_id);
ret_cols.push_back(col_id);
ret_data.push_back(data ? data[k] : k);
}
}
}
return {NDArray::FromVector(ret_rows),
NDArray::FromVector(ret_cols),
NDArray::FromVector(ret_data)};
}
template std::vector<NDArray> COOGetDataAndIndices<kDLCPU, int32_t, int32_t>(
COOMatrix coo, NDArray rows, NDArray cols);
template std::vector<NDArray> COOGetDataAndIndices<kDLCPU, int64_t, int64_t>(
COOMatrix coo, NDArray rows, NDArray cols);
///////////////////////////// COOTranspose /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOTranspose(COOMatrix coo) {
return COOMatrix{coo.num_cols, coo.num_rows, coo.col, coo.row, coo.data};
}
template COOMatrix COOTranspose<kDLCPU, int32_t, int32_t>(COOMatrix coo);
template COOMatrix COOTranspose<kDLCPU, int64_t, int64_t>(COOMatrix coo);
///////////////////////////// COOToCSR /////////////////////////////
// complexity: time O(NNZ), space O(1)
template <DLDeviceType XPU, typename IdType, typename DType>
CSRMatrix COOToCSR(COOMatrix coo) {
const int64_t N = coo.num_rows;
const int64_t NNZ = coo.row->shape[0];
const IdType* row_data = static_cast<IdType*>(coo.row->data);
const IdType* col_data = static_cast<IdType*>(coo.col->data);
NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx);
NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
NDArray ret_data;
if (COOHasData(coo)) {
ret_data = NDArray::Empty({NNZ}, coo.data->dtype, coo.data->ctx);
} else {
// if no data array in the input coo, the return data array is a shuffle index.
ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
}
IdType* Bp = static_cast<IdType*>(ret_indptr->data);
IdType* Bi = static_cast<IdType*>(ret_indices->data);
std::fill(Bp, Bp + N, 0);
for (int64_t i = 0; i < NNZ; ++i) {
Bp[row_data[i]]++;
}
// cumsum
for (int64_t i = 0, cumsum = 0; i < N; ++i) {
const IdType temp = Bp[i];
Bp[i] = cumsum;
cumsum += temp;
}
Bp[N] = NNZ;
for (int64_t i = 0; i < NNZ; ++i) {
const IdType r = row_data[i];
Bi[Bp[r]] = col_data[i];
if (COOHasData(coo)) {
const DType* data = static_cast<DType*>(coo.data->data);
DType* Bx = static_cast<DType*>(ret_data->data);
Bx[Bp[r]] = data[i];
} else {
IdType* Bx = static_cast<IdType*>(ret_data->data);
Bx[Bp[r]] = i;
}
Bp[r]++;
}
// correct the indptr
for (int64_t i = 0, last = 0; i <= N; ++i) {
IdType temp = Bp[i];
Bp[i] = last;
last = temp;
}
return CSRMatrix{coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data};
}
template CSRMatrix COOToCSR<kDLCPU, int32_t, int32_t>(COOMatrix coo);
template CSRMatrix COOToCSR<kDLCPU, int64_t, int64_t>(COOMatrix coo);
///////////////////////////// COOSliceRows /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) {
CHECK(start >= 0 && start < coo.num_rows) << "Invalid start row " << start;
CHECK(end > 0 && end <= coo.num_rows) << "Invalid end row " << end;
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
std::vector<IdType> ret_row, ret_col;
std::vector<DType> ret_data;
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
const IdType row_id = coo_row_data[i];
const IdType col_id = coo_col_data[i];
if (row_id < end && row_id >= start) {
ret_row.push_back(row_id - start);
ret_col.push_back(col_id);
ret_data.push_back(coo_data ? coo_data[i] : i);
}
}
return COOMatrix{
end - start,
coo.num_cols,
NDArray::FromVector(ret_row),
NDArray::FromVector(ret_col),
NDArray::FromVector(ret_data)};
}
template COOMatrix COOSliceRows<kDLCPU, int32_t, int32_t>(COOMatrix, int64_t, int64_t);
template COOMatrix COOSliceRows<kDLCPU, int64_t, int64_t>(COOMatrix, int64_t, int64_t);
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) {
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
std::vector<IdType> ret_row, ret_col;
std::vector<DType> ret_data;
IdHashMap<IdType> hashmap(rows);
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
const IdType row_id = coo_row_data[i];
const IdType col_id = coo_col_data[i];
const IdType mapped_row_id = hashmap.Map(row_id, -1);
if (mapped_row_id != -1) {
ret_row.push_back(mapped_row_id);
ret_col.push_back(col_id);
ret_data.push_back(coo_data ? coo_data[i] : i);
}
}
return COOMatrix{
rows->shape[0],
coo.num_cols,
NDArray::FromVector(ret_row),
NDArray::FromVector(ret_col),
NDArray::FromVector(ret_data)};
}
template COOMatrix COOSliceRows<kDLCPU, int32_t, int32_t>(COOMatrix , NDArray);
template COOMatrix COOSliceRows<kDLCPU, int64_t, int64_t>(COOMatrix , NDArray);
///////////////////////////// COOSliceMatrix /////////////////////////////
template <DLDeviceType XPU, typename IdType, typename DType>
COOMatrix COOSliceMatrix(COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols) {
const IdType* coo_row_data = static_cast<IdType*>(coo.row->data);
const IdType* coo_col_data = static_cast<IdType*>(coo.col->data);
const DType* coo_data = COOHasData(coo) ? static_cast<DType*>(coo.data->data) : nullptr;
IdHashMap<IdType> row_map(rows), col_map(cols);
std::vector<IdType> ret_row, ret_col;
std::vector<DType> ret_data;
for (int64_t i = 0; i < coo.row->shape[0]; ++i) {
const IdType row_id = coo_row_data[i];
const IdType col_id = coo_col_data[i];
const IdType mapped_row_id = row_map.Map(row_id, -1);
if (mapped_row_id != -1) {
const IdType mapped_col_id = col_map.Map(col_id, -1);
if (mapped_col_id != -1) {
ret_row.push_back(mapped_row_id);
ret_col.push_back(mapped_col_id);
ret_data.push_back(coo_data ? coo_data[i] : i);
}
}
}
return COOMatrix{
rows->shape[0],
cols->shape[0],
NDArray::FromVector(ret_row),
NDArray::FromVector(ret_col),
NDArray::FromVector(ret_data)};
}
template COOMatrix COOSliceMatrix<kDLCPU, int32_t, int32_t>(
COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
template COOMatrix COOSliceMatrix<kDLCPU, int64_t, int64_t>(
COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols);
} // namespace impl
} // namespace aten
} // namespace dgl
......@@ -7,8 +7,14 @@
#include <dgl/array.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/container.h>
#include <vector>
#include <tuple>
#include <utility>
#include "../c_api_common.h"
#include "./unit_graph.h"
// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
// only works on CPU. Should fix later to make it device agnostic.
#include "../array/cpu/array_utils.h"
using namespace dgl::runtime;
......@@ -103,6 +109,66 @@ HeteroSubgraph EdgeSubgraphNoPreserveNodes(
return ret;
}
template<typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
// TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
// Step 1: Collect the nodes that has connections for each type.
std::vector<aten::IdHashMap<IdType>> hashmaps(graphs[0]->NumVertexTypes());
std::vector<std::vector<EdgeArray>> all_edges(graphs.size()); // all_edges[i][etype]
for (size_t i = 0; i < graphs.size(); ++i) {
const HeteroGraphPtr curr_graph = graphs[i];
const int64_t num_etypes = curr_graph->NumEdgeTypes();
for (IdType etype = 0; etype < num_etypes; ++etype) {
IdType srctype, dsttype;
std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
const EdgeArray edges = curr_graph->Edges(etype, "eid");
hashmaps[srctype].Update(edges.src);
hashmaps[dsttype].Update(edges.dst);
all_edges[i].push_back(edges);
}
}
// Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
std::vector<IdArray> induced_nodes;
for (auto &hashmap : hashmaps)
induced_nodes.push_back(hashmap.Values());
// Step 3: Remap the edges of each graph.
std::vector<HeteroGraphPtr> new_graphs;
for (size_t i = 0; i < graphs.size(); ++i) {
std::vector<HeteroGraphPtr> rel_graphs;
const HeteroGraphPtr curr_graph = graphs[i];
const auto meta_graph = curr_graph->meta_graph();
const int64_t num_etypes = curr_graph->NumEdgeTypes();
for (IdType etype = 0; etype < num_etypes; ++etype) {
IdType srctype, dsttype;
std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
const EdgeArray &edges = all_edges[i][etype];
const IdArray mapped_rows = hashmaps[srctype].Map(edges.src, -1);
const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);
rel_graphs.push_back(UnitGraph::CreateFromCOO(
srctype == dsttype ? 1 : 2,
induced_nodes[srctype]->shape[0],
induced_nodes[dsttype]->shape[0],
mapped_rows,
mapped_cols));
}
new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs));
}
return std::make_pair(new_graphs, induced_nodes);
}
} // namespace
HeteroGraph::HeteroGraph(GraphPtr meta_graph, const std::vector<HeteroGraphPtr>& rel_graphs)
......@@ -419,6 +485,15 @@ HeteroGraphPtr CreateHeteroGraph(
return HeteroGraphPtr(new HeteroGraph(meta_graph, rel_graphs));
}
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs(const std::vector<HeteroGraphPtr> &graphs) {
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result;
ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
result = CompactGraphs<IdType>(graphs);
});
return result;
}
///////////////////////// C APIs /////////////////////////
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO")
......@@ -428,7 +503,9 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO")
int64_t num_dst = args[2];
IdArray row = args[3];
IdArray col = args[4];
auto hgptr = UnitGraph::CreateFromCOO(nvtypes, num_src, num_dst, row, col);
SparseFormat restrict_format = ParseSparseFormat(args[5]);
auto hgptr = UnitGraph::CreateFromCOO(
nvtypes, num_src, num_dst, row, col, restrict_format);
*rv = HeteroGraphRef(hgptr);
});
......@@ -440,8 +517,9 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
IdArray indptr = args[3];
IdArray indices = args[4];
IdArray edge_ids = args[5];
SparseFormat restrict_format = ParseSparseFormat(args[6]);
auto hgptr = UnitGraph::CreateFromCSR(
nvtypes, num_src, num_dst, indptr, indices, edge_ids);
nvtypes, num_src, num_dst, indptr, indices, edge_ids, restrict_format);
*rv = HeteroGraphRef(hgptr);
});
......@@ -782,6 +860,31 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeSubgraph")
*rv = HeteroSubgraphRef(subg);
});
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLCompactGraphs")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
List<HeteroGraphRef> graph_refs = args[0];
std::vector<HeteroGraphPtr> graphs;
for (HeteroGraphRef gref : graph_refs)
graphs.push_back(gref.sptr());
const auto &result_pair = CompactGraphs(graphs);
List<HeteroGraphRef> compacted_graph_refs;
List<Value> induced_nodes;
for (const HeteroGraphPtr g : result_pair.first)
compacted_graph_refs.push_back(HeteroGraphRef(g));
for (const IdArray &ids : result_pair.second)
induced_nodes.push_back(Value(MakeValue(ids)));
List<ObjectRef> result;
result.push_back(compacted_graph_refs);
result.push_back(induced_nodes);
*rv = result;
});
// HeteroSubgraph C APIs
DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSubgraphGetGraph")
......
......@@ -41,6 +41,10 @@ class HeteroGraph : public BaseHeteroGraph {
LOG(FATAL) << "Bipartite graph is not mutable.";
}
DLDataType DataType() const override {
return relation_graphs_[0]->DataType();
}
DLContext Context() const override {
return relation_graphs_[0]->Context();
}
......
......@@ -4,8 +4,8 @@
* \brief DGL sampler - templated implementation definition of random walks on CPU
*/
#ifndef DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
#define DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
......@@ -145,4 +145,4 @@ IdArray MetapathBasedRandomWalk(
}; // namespace dgl
#endif // DGL_GRAPH_SAMPLING_METAPATH_RANDOMWALK_H_
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_
......@@ -11,7 +11,7 @@
#include <utility>
#include <tuple>
#include <vector>
#include "../../c_api_common.h"
#include "../../../c_api_common.h"
#include "randomwalks_impl.h"
using namespace dgl::runtime;
......
......@@ -4,8 +4,8 @@
* \brief DGL sampler - templated implementation definition of random walks on CPU
*/
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
#include <dgl/base_heterograph.h>
#include <dgl/array.h>
......@@ -71,4 +71,4 @@ IdArray GenericRandomWalk(
}; // namespace dgl
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_CPU_H_
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_
......@@ -4,8 +4,8 @@
* \brief DGL sampler - templated implementation definition of random walks
*/
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
#ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
#define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
#include <dgl/base_heterograph.h>
#include <dgl/array.h>
......@@ -114,4 +114,4 @@ IdArray RandomWalkWithStepwiseRestart(
}; // namespace dgl
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_IMPL_H_
#endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_
......@@ -113,6 +113,10 @@ class UnitGraph::COO : public BaseHeteroGraph {
LOG(FATAL) << "UnitGraph graph is not mutable.";
}
DLDataType DataType() const override {
return adj_.row->dtype;
}
DLContext Context() const override {
return adj_.row->ctx;
}
......@@ -121,6 +125,32 @@ class UnitGraph::COO : public BaseHeteroGraph {
return adj_.row->dtype.bits;
}
COO AsNumBits(uint8_t bits) const {
if (NumBits() == bits)
return *this;
COO ret(
meta_graph_,
adj_.num_rows, adj_.num_cols,
aten::AsNumBits(adj_.row, bits),
aten::AsNumBits(adj_.col, bits));
ret.is_multigraph_ = is_multigraph_;
return ret;
}
COO CopyTo(const DLContext& ctx) const {
if (Context() == ctx)
return *this;
COO ret(
meta_graph_,
adj_.num_rows, adj_.num_cols,
adj_.row.CopyTo(ctx),
adj_.col.CopyTo(ctx));
ret.is_multigraph_ = is_multigraph_;
return ret;
}
bool IsMultigraph() const override {
return const_cast<COO*>(this)->is_multigraph_.Get([this] () {
return aten::COOHasDuplicate(adj_);
......@@ -156,33 +186,38 @@ class UnitGraph::COO : public BaseHeteroGraph {
}
bool HasEdgeBetween(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src;
CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst;
return aten::COOIsNonZero(adj_, src, dst);
}
BoolArray HasEdgesBetween(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(src_ids)) << "Invalid vertex id array.";
CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid vertex id array.";
return aten::COOIsNonZero(adj_, src_ids, dst_ids);
}
IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst;
return aten::COOGetRowDataAndIndices(aten::COOTranspose(adj_), dst).second;
}
IdArray Successors(dgl_type_t etype, dgl_id_t src) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src;
return aten::COOGetRowDataAndIndices(adj_, src).second;
}
IdArray EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src;
CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst;
return aten::COOGetData(adj_, src, dst);
}
EdgeArray EdgeIds(dgl_type_t etype, IdArray src, IdArray dst) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(src)) << "Invalid vertex id array.";
CHECK(aten::IsValidIdArray(dst)) << "Invalid vertex id array.";
const auto& arrs = aten::COOGetDataAndIndices(adj_, src, dst);
return EdgeArray{arrs[0], arrs[1], arrs[2]};
}
std::pair<dgl_id_t, dgl_id_t> FindEdge(dgl_type_t etype, dgl_id_t eid) const override {
......@@ -200,23 +235,32 @@ class UnitGraph::COO : public BaseHeteroGraph {
}
EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
IdArray ret_src, ret_eid;
std::tie(ret_eid, ret_src) = aten::COOGetRowDataAndIndices(
aten::COOTranspose(adj_), vid);
IdArray ret_dst = aten::Full(vid, ret_src->shape[0], NumBits(), ret_src->ctx);
return EdgeArray{ret_src, ret_dst, ret_eid};
}
EdgeArray InEdges(dgl_type_t etype, IdArray vids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array.";
auto coosubmat = aten::COOSliceRows(aten::COOTranspose(adj_), vids);
auto row = aten::IndexSelect(vids, coosubmat.row);
return EdgeArray{coosubmat.col, row, coosubmat.data};
}
EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
IdArray ret_dst, ret_eid;
std::tie(ret_eid, ret_dst) = aten::COOGetRowDataAndIndices(adj_, vid);
IdArray ret_src = aten::Full(vid, ret_dst->shape[0], NumBits(), ret_dst->ctx);
return EdgeArray{ret_src, ret_dst, ret_eid};
}
EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array.";
auto coosubmat = aten::COOSliceRows(adj_, vids);
auto row = aten::IndexSelect(vids, coosubmat.row);
return EdgeArray{row, coosubmat.col, coosubmat.data};
}
EdgeArray Edges(dgl_type_t etype, const std::string &order = "") const override {
......@@ -228,23 +272,23 @@ class UnitGraph::COO : public BaseHeteroGraph {
}
uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(DstType(), vid)) << "Invalid dst vertex id: " << vid;
return aten::COOGetRowNNZ(aten::COOTranspose(adj_), vid);
}
DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array.";
return aten::COOGetRowNNZ(aten::COOTranspose(adj_), vids);
}
uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(HasVertex(SrcType(), vid)) << "Invalid src vertex id: " << vid;
return aten::COOGetRowNNZ(adj_, vid);
}
DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array.";
return aten::COOGetRowNNZ(adj_, vids);
}
DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const override {
......@@ -278,8 +322,18 @@ class UnitGraph::COO : public BaseHeteroGraph {
}
HeteroSubgraph VertexSubgraph(const std::vector<IdArray>& vids) const override {
LOG(INFO) << "Not enabled for COO graph.";
return {};
CHECK_EQ(vids.size(), NumVertexTypes()) << "Number of vertex types mismatch";
auto srcvids = vids[SrcType()], dstvids = vids[DstType()];
CHECK(aten::IsValidIdArray(srcvids)) << "Invalid vertex id array.";
CHECK(aten::IsValidIdArray(dstvids)) << "Invalid vertex id array.";
HeteroSubgraph subg;
const auto& submat = aten::COOSliceMatrix(adj_, srcvids, dstvids);
IdArray sub_eids = aten::Range(0, submat.data->shape[0], NumBits(), Context());
subg.graph = std::make_shared<COO>(meta_graph(), submat.num_rows, submat.num_cols,
submat.row, submat.col);
subg.induced_vertices = vids;
subg.induced_edges.emplace_back(submat.data);
return subg;
}
HeteroSubgraph EdgeSubgraph(
......@@ -312,6 +366,15 @@ class UnitGraph::COO : public BaseHeteroGraph {
return adj_;
}
/*!
* \brief Determines whether the graph is "hypersparse", i.e. having significantly more
* nodes than edges.
*/
bool IsHypersparse() const {
return (NumVertices(SrcType()) / 8 > NumEdges(EdgeType())) &&
(NumVertices(SrcType()) > 1000000);
}
private:
/*! \brief internal adjacency matrix. Data array is empty */
aten::COOMatrix adj_;
......@@ -392,6 +455,10 @@ class UnitGraph::CSR : public BaseHeteroGraph {
LOG(FATAL) << "UnitGraph graph is not mutable.";
}
DLDataType DataType() const override {
return adj_.indices->dtype;
}
DLContext Context() const override {
return adj_.indices->ctx;
}
......@@ -643,6 +710,10 @@ class UnitGraph::CSR : public BaseHeteroGraph {
//
//////////////////////////////////////////////////////////
DLDataType UnitGraph::DataType() const {
return GetAny()->DataType();
}
DLContext UnitGraph::Context() const {
return GetAny()->Context();
}
......@@ -656,14 +727,13 @@ bool UnitGraph::IsMultigraph() const {
}
uint64_t UnitGraph::NumVertices(dgl_type_t vtype) const {
if (in_csr_) {
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
// TODO(BarclayII): we have a lot of special handling for CSC.
// Need to have a UnitGraph::CSC backend instead.
if (fmt == SparseFormat::CSC)
vtype = (vtype == SrcType()) ? DstType() : SrcType();
return in_csr_->NumVertices(vtype);
} else if (out_csr_) {
return out_csr_->NumVertices(vtype);
} else {
return GetCOO()->NumVertices(vtype);
}
return ptr->NumVertices(vtype);
}
uint64_t UnitGraph::NumEdges(dgl_type_t etype) const {
......@@ -671,14 +741,11 @@ uint64_t UnitGraph::NumEdges(dgl_type_t etype) const {
}
bool UnitGraph::HasVertex(dgl_type_t vtype, dgl_id_t vid) const {
if (in_csr_) {
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
vtype = (vtype == SrcType()) ? DstType() : SrcType();
return in_csr_->HasVertex(vtype, vid);
} else if (out_csr_) {
return out_csr_->HasVertex(vtype, vid);
} else {
return GetCOO()->HasVertex(vtype, vid);
}
return ptr->HasVertex(vtype, vid);
}
BoolArray UnitGraph::HasVertices(dgl_type_t vtype, IdArray vids) const {
......@@ -687,125 +754,184 @@ BoolArray UnitGraph::HasVertices(dgl_type_t vtype, IdArray vids) const {
}
bool UnitGraph::HasEdgeBetween(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const {
if (in_csr_) {
return in_csr_->HasEdgeBetween(etype, dst, src);
} else {
return GetOutCSR()->HasEdgeBetween(etype, src, dst);
}
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->HasEdgeBetween(etype, dst, src);
else
return ptr->HasEdgeBetween(etype, src, dst);
}
BoolArray UnitGraph::HasEdgesBetween(
dgl_type_t etype, IdArray src, IdArray dst) const {
if (in_csr_) {
return in_csr_->HasEdgesBetween(etype, dst, src);
} else {
return GetOutCSR()->HasEdgesBetween(etype, src, dst);
}
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->HasEdgesBetween(etype, dst, src);
else
return ptr->HasEdgesBetween(etype, src, dst);
}
IdArray UnitGraph::Predecessors(dgl_type_t etype, dgl_id_t dst) const {
return GetInCSR()->Successors(etype, dst);
const SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->Successors(etype, dst);
else
return ptr->Predecessors(etype, dst);
}
IdArray UnitGraph::Successors(dgl_type_t etype, dgl_id_t src) const {
return GetOutCSR()->Successors(etype, src);
const SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->Successors(etype, src);
}
IdArray UnitGraph::EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const {
if (in_csr_) {
return in_csr_->EdgeId(etype, dst, src);
} else {
return GetOutCSR()->EdgeId(etype, src, dst);
}
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->EdgeId(etype, dst, src);
else
return ptr->EdgeId(etype, src, dst);
}
EdgeArray UnitGraph::EdgeIds(dgl_type_t etype, IdArray src, IdArray dst) const {
if (in_csr_) {
EdgeArray edges = in_csr_->EdgeIds(etype, dst, src);
const SparseFormat fmt = SelectFormat(SparseFormat::ANY);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC) {
EdgeArray edges = ptr->EdgeIds(etype, dst, src);
return EdgeArray{edges.dst, edges.src, edges.id};
} else {
return GetOutCSR()->EdgeIds(etype, src, dst);
return ptr->EdgeIds(etype, src, dst);
}
}
std::pair<dgl_id_t, dgl_id_t> UnitGraph::FindEdge(dgl_type_t etype, dgl_id_t eid) const {
return GetCOO()->FindEdge(etype, eid);
const SparseFormat fmt = SelectFormat(SparseFormat::COO);
const auto ptr = GetFormat(fmt);
return ptr->FindEdge(etype, eid);
}
EdgeArray UnitGraph::FindEdges(dgl_type_t etype, IdArray eids) const {
return GetCOO()->FindEdges(etype, eids);
const SparseFormat fmt = SelectFormat(SparseFormat::COO);
const auto ptr = GetFormat(fmt);
return ptr->FindEdges(etype, eids);
}
EdgeArray UnitGraph::InEdges(dgl_type_t etype, dgl_id_t vid) const {
const EdgeArray& ret = GetInCSR()->OutEdges(etype, vid);
const SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC) {
const EdgeArray& ret = ptr->OutEdges(etype, vid);
return {ret.dst, ret.src, ret.id};
} else {
return ptr->InEdges(etype, vid);
}
}
EdgeArray UnitGraph::InEdges(dgl_type_t etype, IdArray vids) const {
const EdgeArray& ret = GetInCSR()->OutEdges(etype, vids);
const SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC) {
const EdgeArray& ret = ptr->OutEdges(etype, vids);
return {ret.dst, ret.src, ret.id};
} else {
return ptr->InEdges(etype, vids);
}
}
EdgeArray UnitGraph::OutEdges(dgl_type_t etype, dgl_id_t vid) const {
return GetOutCSR()->OutEdges(etype, vid);
const SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->OutEdges(etype, vid);
}
EdgeArray UnitGraph::OutEdges(dgl_type_t etype, IdArray vids) const {
return GetOutCSR()->OutEdges(etype, vids);
const SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->OutEdges(etype, vids);
}
EdgeArray UnitGraph::Edges(dgl_type_t etype, const std::string &order) const {
if (order.empty()) {
SparseFormat fmt;
if (order == std::string("eid")) {
fmt = SelectFormat(SparseFormat::COO);
} else if (order.empty()) {
// arbitrary order
if (in_csr_) {
// transpose
const auto& edges = in_csr_->Edges(etype, order);
return EdgeArray{edges.dst, edges.src, edges.id};
} else {
return GetAny()->Edges(etype, order);
}
fmt = SelectFormat(SparseFormat::ANY);
} else if (order == std::string("srcdst")) {
// TODO(minjie): CSR only guarantees "src" to be sorted.
// Maybe we should relax this requirement?
return GetOutCSR()->Edges(etype, order);
} else if (order == std::string("eid")) {
return GetCOO()->Edges(etype, order);
fmt = SelectFormat(SparseFormat::CSR);
} else {
LOG(FATAL) << "Unsupported order request: " << order;
}
return {};
}
const auto& edges = GetFormat(fmt)->Edges(etype, order);
if (fmt == SparseFormat::CSC)
return EdgeArray{edges.dst, edges.src, edges.id};
else
return edges;
}
uint64_t UnitGraph::InDegree(dgl_type_t etype, dgl_id_t vid) const {
return GetInCSR()->OutDegree(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->OutDegree(etype, vid);
else
return ptr->InDegree(etype, vid);
}
DegreeArray UnitGraph::InDegrees(dgl_type_t etype, IdArray vids) const {
return GetInCSR()->OutDegrees(etype, vids);
SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->OutDegrees(etype, vids);
else
return ptr->InDegrees(etype, vids);
}
uint64_t UnitGraph::OutDegree(dgl_type_t etype, dgl_id_t vid) const {
return GetOutCSR()->OutDegree(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->OutDegree(etype, vid);
}
DegreeArray UnitGraph::OutDegrees(dgl_type_t etype, IdArray vids) const {
return GetOutCSR()->OutDegrees(etype, vids);
SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->OutDegrees(etype, vids);
}
DGLIdIters UnitGraph::SuccVec(dgl_type_t etype, dgl_id_t vid) const {
return GetOutCSR()->SuccVec(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->SuccVec(etype, vid);
}
DGLIdIters UnitGraph::OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const {
return GetOutCSR()->OutEdgeVec(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSR);
const auto ptr = GetFormat(fmt);
return ptr->OutEdgeVec(etype, vid);
}
DGLIdIters UnitGraph::PredVec(dgl_type_t etype, dgl_id_t vid) const {
return GetInCSR()->SuccVec(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->SuccVec(etype, vid);
else
return ptr->PredVec(etype, vid);
}
DGLIdIters UnitGraph::InEdgeVec(dgl_type_t etype, dgl_id_t vid) const {
return GetInCSR()->OutEdgeVec(etype, vid);
SparseFormat fmt = SelectFormat(SparseFormat::CSC);
const auto ptr = GetFormat(fmt);
if (fmt == SparseFormat::CSC)
return ptr->OutEdgeVec(etype, vid);
else
return ptr->InEdgeVec(etype, vid);
}
std::vector<IdArray> UnitGraph::GetAdj(
......@@ -830,7 +956,8 @@ std::vector<IdArray> UnitGraph::GetAdj(
HeteroSubgraph UnitGraph::VertexSubgraph(const std::vector<IdArray>& vids) const {
// We prefer to generate a subgraph from out-csr.
auto sg = GetOutCSR()->VertexSubgraph(vids);
SparseFormat fmt = SelectFormat(SparseFormat::CSR);
HeteroSubgraph sg = GetFormat(fmt)->VertexSubgraph(vids);
CSRPtr subcsr = std::dynamic_pointer_cast<CSR>(sg.graph);
HeteroSubgraph ret;
ret.graph = HeteroGraphPtr(new UnitGraph(meta_graph(), nullptr, subcsr, nullptr));
......@@ -841,7 +968,8 @@ HeteroSubgraph UnitGraph::VertexSubgraph(const std::vector<IdArray>& vids) const
HeteroSubgraph UnitGraph::EdgeSubgraph(
const std::vector<IdArray>& eids, bool preserve_nodes) const {
auto sg = GetCOO()->EdgeSubgraph(eids, preserve_nodes);
SparseFormat fmt = SelectFormat(SparseFormat::COO);
auto sg = GetFormat(fmt)->EdgeSubgraph(eids, preserve_nodes);
COOPtr subcoo = std::dynamic_pointer_cast<COO>(sg.graph);
HeteroSubgraph ret;
ret.graph = HeteroGraphPtr(new UnitGraph(meta_graph(), nullptr, nullptr, subcoo));
......@@ -851,24 +979,27 @@ HeteroSubgraph UnitGraph::EdgeSubgraph(
}
HeteroGraphPtr UnitGraph::CreateFromCOO(
int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col) {
int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col,
SparseFormat restrict_format) {
CHECK(num_vtypes == 1 || num_vtypes == 2);
if (num_vtypes == 1)
CHECK_EQ(num_src, num_dst);
auto mg = CreateUnitGraphMetaGraph(num_vtypes);
COOPtr coo(new COO(mg, num_src, num_dst, row, col));
return HeteroGraphPtr(new UnitGraph(mg, nullptr, nullptr, coo));
return HeteroGraphPtr(
new UnitGraph(mg, nullptr, nullptr, coo, restrict_format));
}
HeteroGraphPtr UnitGraph::CreateFromCSR(
int64_t num_vtypes, int64_t num_src, int64_t num_dst,
IdArray indptr, IdArray indices, IdArray edge_ids) {
IdArray indptr, IdArray indices, IdArray edge_ids, SparseFormat restrict_format) {
CHECK(num_vtypes == 1 || num_vtypes == 2);
if (num_vtypes == 1)
CHECK_EQ(num_src, num_dst);
auto mg = CreateUnitGraphMetaGraph(num_vtypes);
CSRPtr csr(new CSR(mg, num_src, num_dst, indptr, indices, edge_ids));
return HeteroGraphPtr(new UnitGraph(mg, nullptr, csr, nullptr));
return HeteroGraphPtr(new UnitGraph(mg, nullptr, csr, nullptr, restrict_format));
}
HeteroGraphPtr UnitGraph::AsNumBits(HeteroGraphPtr g, uint8_t bits) {
......@@ -881,9 +1012,11 @@ HeteroGraphPtr UnitGraph::AsNumBits(HeteroGraphPtr g, uint8_t bits) {
// be fixed later.
auto bg = std::dynamic_pointer_cast<UnitGraph>(g);
CHECK_NOTNULL(bg);
CSRPtr new_incsr = CSRPtr(new CSR(bg->GetInCSR()->AsNumBits(bits)));
CSRPtr new_outcsr = CSRPtr(new CSR(bg->GetOutCSR()->AsNumBits(bits)));
return HeteroGraphPtr(new UnitGraph(g->meta_graph(), new_incsr, new_outcsr, nullptr));
return HeteroGraphPtr(
new UnitGraph(g->meta_graph(), new_incsr, new_outcsr, nullptr, bg->restrict_format_));
}
}
......@@ -897,13 +1030,26 @@ HeteroGraphPtr UnitGraph::CopyTo(HeteroGraphPtr g, const DLContext& ctx) {
// be fixed later.
auto bg = std::dynamic_pointer_cast<UnitGraph>(g);
CHECK_NOTNULL(bg);
CSRPtr new_incsr = CSRPtr(new CSR(bg->GetInCSR()->CopyTo(ctx)));
CSRPtr new_outcsr = CSRPtr(new CSR(bg->GetOutCSR()->CopyTo(ctx)));
return HeteroGraphPtr(new UnitGraph(g->meta_graph(), new_incsr, new_outcsr, nullptr));
return HeteroGraphPtr(
new UnitGraph(g->meta_graph(), new_incsr, new_outcsr, nullptr, bg->restrict_format_));
}
UnitGraph::UnitGraph(GraphPtr metagraph, CSRPtr in_csr, CSRPtr out_csr, COOPtr coo)
UnitGraph::UnitGraph(GraphPtr metagraph, CSRPtr in_csr, CSRPtr out_csr, COOPtr coo,
SparseFormat restrict_format)
: BaseHeteroGraph(metagraph), in_csr_(in_csr), out_csr_(out_csr), coo_(coo) {
restrict_format_ = restrict_format;
// If the graph is hypersparse and in COO format, switch the restricted format to COO.
// If the graph is given as CSR, the indptr array is already materialized so we don't
// care about restricting conversion anyway (even if it is hypersparse).
if (restrict_format == SparseFormat::ANY) {
if (coo && coo->IsHypersparse())
restrict_format_ = SparseFormat::COO;
}
CHECK(GetAny()) << "At least one graph structure should exist.";
}
......@@ -977,4 +1123,33 @@ HeteroGraphPtr UnitGraph::GetAny() const {
}
}
HeteroGraphPtr UnitGraph::GetFormat(SparseFormat format) const {
switch (format) {
case SparseFormat::CSR:
return GetOutCSR();
case SparseFormat::CSC:
return GetInCSR();
case SparseFormat::COO:
return GetCOO();
case SparseFormat::ANY:
return GetAny();
default:
LOG(FATAL) << "unsupported format code";
return nullptr;
}
}
SparseFormat UnitGraph::SelectFormat(SparseFormat preferred_format) const {
if (restrict_format_ != SparseFormat::ANY)
return restrict_format_;
else if (preferred_format != SparseFormat::ANY)
return preferred_format;
else if (in_csr_)
return SparseFormat::CSC;
else if (out_csr_)
return SparseFormat::CSR;
else
return SparseFormat::COO;
}
} // namespace dgl
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment