"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "dd3cae33279076d813000f03c63e4ee84ce0fa77"
Unverified Commit bbebde46 authored by nv-dlasalle's avatar nv-dlasalle Committed by GitHub
Browse files

[Performance] Track sorted status of COO from creation (#2645)



* Add row/col sorted flags

* improve sorting paths

* Remove print statement

* Keep track of sorted matrices

* Remove sort check in to_block

* Improve CPU sorted COO->CSR

* Handle the zero edge case

* Remove omp default clause to work with MSVC

* Update comments on sorted COO->CSR cpu implementatoin

* Expose sorted to python interface

* Make check_sorted default to false for dgl.graph()

* remove check sorted; add utests

* remove check_sorted flag
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 302125ef
......@@ -604,12 +604,17 @@ HeteroGraphPtr CreateHeteroGraph(
* \param num_dst Number of nodes in the destination type.
* \param row Src node ids of the edges.
* \param col Dst node ids of the edges.
* \param row_sorted Whether the `row` array is in sorted ascending order.
* \param col_sorted When `row_sorted` is true, whether the columns within each
* row are also sorted. When `row_sorted` is false, this flag must also be
* false.
* \param formats Sparse formats used for storing this graph.
* \return A heterograph pointer.
*/
HeteroGraphPtr CreateFromCOO(
int64_t num_vtypes, int64_t num_src, int64_t num_dst,
IdArray row, IdArray col, dgl_format_code_t formats = ALL_CODE);
IdArray row, IdArray col, bool row_sorted = false, bool col_sorted = false,
dgl_format_code_t formats = ALL_CODE);
/*!
* \brief Create a heterograph from COO input.
......
......@@ -267,7 +267,8 @@ class CSR : public GraphInterface {
class COO : public GraphInterface {
public:
// Create a coo graph that shares the given src and dst
COO(int64_t num_vertices, IdArray src, IdArray dst);
COO(int64_t num_vertices, IdArray src, IdArray dst,
bool row_sorted = false, bool col_sorted = false);
// TODO(da): add constructor for creating COO from shared memory
......@@ -891,7 +892,8 @@ class ImmutableGraph: public GraphInterface {
/*! \brief Create an immutable graph from COO. */
static ImmutableGraphPtr CreateFromCOO(
int64_t num_vertices, IdArray src, IdArray dst);
int64_t num_vertices, IdArray src, IdArray dst,
bool row_osrted = false, bool col_sorted = false);
/*!
* \brief Convert the given graph to an immutable graph.
......
......@@ -37,6 +37,8 @@ def graph(data,
num_nodes=None,
idtype=None,
device=None,
row_sorted=False,
col_sorted=False,
**deprecated_kwargs):
"""Create a graph and return.
......@@ -72,6 +74,11 @@ def graph(data,
the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the
returned graph is on CPU. If the specified :attr:`device` differs from that of the
provided tensors, it casts the given tensors to the specified device first.
row_sorted : bool, optional
Whether or not the rows of the COO are in ascending order.
col_sorted : bool, optional
Whether or not the columns of the COO are in ascending order within
each row. This only has an effect when ``row_sorted`` is True.
Returns
-------
......@@ -158,7 +165,9 @@ def graph(data,
' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
urange, vrange = num_nodes, num_nodes
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange, validate=False)
g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange,
row_sorted=row_sorted, col_sorted=col_sorted,
validate=False)
return g.to(device)
......@@ -1590,7 +1599,9 @@ DGLHeteroGraph.to_networkx = to_networkx
def create_from_edges(u, v,
utype, etype, vtype,
urange, vrange,
validate=True):
validate=True,
row_sorted=False,
col_sorted=False):
"""Internal function to create a graph from incident nodes with types.
utype could be equal to vtype
......@@ -1615,6 +1626,12 @@ def create_from_edges(u, v,
maximum of the destination node IDs in the edge list plus 1. (Default: None)
validate : bool, optional
If True, checks if node IDs are within range.
row_sorted : bool, optional
Whether or not the rows of the COO are in ascending order.
col_sorted : bool, optional
Whether or not the columns of the COO are in ascending order within
each row. This only has an effect when ``row_sorted`` is True.
Returns
-------
......@@ -1636,7 +1653,8 @@ def create_from_edges(u, v,
num_ntypes = 2
hgidx = heterograph_index.create_unitgraph_from_coo(
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'])
num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
row_sorted, col_sorted)
if utype == vtype:
return DGLHeteroGraph(hgidx, [utype], [etype])
else:
......
......@@ -970,7 +970,7 @@ class HeteroSubgraphIndex(ObjectBase):
#################################################################
def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
formats):
formats, row_sorted=False, col_sorted=False):
"""Create a unitgraph graph index from COO format
Parameters
......@@ -987,6 +987,11 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
Col index.
formats : list of str.
Restrict the storage formats allowed for the unit graph.
row_sorted : bool, optional
Whether or not the rows of the COO are in ascending order.
col_sorted : bool, optional
Whether or not the columns of the COO are in ascending order within
each row. This only has an effect when ``row_sorted`` is True.
Returns
-------
......@@ -997,7 +1002,7 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
return _CAPI_DGLHeteroCreateUnitGraphFromCOO(
int(num_ntypes), int(num_src), int(num_dst),
F.to_dgl_nd(row), F.to_dgl_nd(col),
formats)
formats, row_sorted, col_sorted)
def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
formats):
......
......@@ -8,6 +8,7 @@
#include <unordered_set>
#include <unordered_map>
#include <tuple>
#include <numeric>
#include "array_utils.h"
namespace dgl {
......@@ -312,26 +313,85 @@ CSRMatrix COOToCSR(COOMatrix coo) {
NDArray ret_indices;
NDArray ret_data;
bool row_sorted = coo.row_sorted;
bool col_sorted = coo.col_sorted;
const bool row_sorted = coo.row_sorted;
const bool col_sorted = coo.col_sorted;
if (row_sorted) {
// compute indptr
IdType* Bp = static_cast<IdType*>(ret_indptr->data);
IdType* const Bp = static_cast<IdType*>(ret_indptr->data);
Bp[0] = 0;
int64_t j = 0;
for (int64_t i = 0; i < N; ++i) {
const int64_t k = j;
for (; j < NNZ && row_data[j] == i; ++j) {}
Bp[i + 1] = Bp[i] + j - k;
if (!data) {
// Leave empty, and populate from inside of parallel block
coo.data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
}
IdType * const fill_data = data ? nullptr : static_cast<IdType*>(coo.data->data);
// TODO(minjie): Many of our current implementation assumes that CSR must have
// a data array. This is a temporary workaround. Remove this after:
// - The old immutable graph implementation is deprecated.
// - The old binary reduce kernel is deprecated.
if (!COOHasData(coo))
coo.data = aten::Range(0, NNZ, coo.row->dtype.bits, coo.row->ctx);
if (NNZ > 0) {
#pragma omp parallel
{
const int num_threads = omp_get_num_threads();
const int thread_id = omp_get_thread_num();
// We partition the set the of non-zeros among the threads
const int64_t nz_chunk = (NNZ+num_threads-1)/num_threads;
const int64_t nz_start = thread_id*nz_chunk;
const int64_t nz_end = std::min(NNZ, nz_start+nz_chunk);
// Each thread searchs the row array for a change, and marks it's
// location in Bp. Threads, other than the first, start at the last
// index covered by the previous, in order to detect changes in the row
// array between thread partitions. This means that each thread after
// the first, searches the range [nz_start-1, nz_end). That is,
// if we had 10 non-zeros, and 4 threads, the indexes searched by each
// thread would be:
// 0: [0, 1, 2]
// 1: [2, 3, 4, 5]
// 2: [5, 6, 7, 8]
// 3: [8, 9]
//
// That way, if the row array were [0, 0, 1, 2, 2, 2, 4, 5, 5, 6], each
// change in row would be captured by one thread:
//
// 0: [0, 0, 1] - row 0
// 1: [1, 2, 2, 2] - row 1
// 2: [2, 4, 5, 5] - rows 2, 3, and 4
// 3: [5, 6] - rows 5 and 6
//
int64_t row = 0;
if (nz_start < nz_end) {
row = nz_start == 0 ? 0 : row_data[nz_start-1];
for (int64_t i = nz_start; i < nz_end; ++i) {
while (row != row_data[i]) {
++row;
Bp[row] = i;
}
}
// We will not detect the row change for the last row, nor any empty
// rows at the end of the matrix, so the last active thread needs
// mark all remaining rows in Bp with NNZ.
if (nz_end == NNZ) {
while (row < N) {
++row;
Bp[row] = NNZ;
}
}
if (fill_data) {
// TODO(minjie): Many of our current implementation assumes that CSR must have
// a data array. This is a temporary workaround. Remove this after:
// - The old immutable graph implementation is deprecated.
// - The old binary reduce kernel is deprecated.
std::iota(fill_data+nz_start,
fill_data+nz_end,
nz_start);
}
}
}
} else {
std::fill(Bp, Bp+N+1, 0);
}
// compute indices and data
ret_indices = coo.col;
......
......@@ -18,9 +18,10 @@ HeteroGraphPtr CreateHeteroGraph(
HeteroGraphPtr CreateFromCOO(
int64_t num_vtypes, int64_t num_src, int64_t num_dst,
IdArray row, IdArray col, dgl_format_code_t formats) {
IdArray row, IdArray col,
bool row_sorted, bool col_sorted, dgl_format_code_t formats) {
auto unit_g = UnitGraph::CreateFromCOO(
num_vtypes, num_src, num_dst, row, col, formats);
num_vtypes, num_src, num_dst, row, col, row_sorted, col_sorted, formats);
return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g}));
}
......
......@@ -4,6 +4,7 @@
* \brief Heterograph CAPI bindings.
*/
#include <dgl/array.h>
#include <dgl/aten/coo.h>
#include <dgl/packed_func_ext.h>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/container.h>
......@@ -29,13 +30,16 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO")
IdArray row = args[3];
IdArray col = args[4];
List<Value> formats = args[5];
bool row_sorted = args[6];
bool col_sorted = args[7];
std::vector<SparseFormat> formats_vec;
for (Value val : formats) {
std::string fmt = val->data;
formats_vec.push_back(ParseSparseFormat(fmt));
}
auto code = SparseFormatsToCode(formats_vec);
auto hgptr = CreateFromCOO(nvtypes, num_src, num_dst, row, col, code);
const auto code = SparseFormatsToCode(formats_vec);
auto hgptr = CreateFromCOO(nvtypes, num_src, num_dst, row, col,
row_sorted, col_sorted, code);
*rv = HeteroGraphRef(hgptr);
});
......@@ -53,7 +57,7 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
std::string fmt = val->data;
formats_vec.push_back(ParseSparseFormat(fmt));
}
auto code = SparseFormatsToCode(formats_vec);
const auto code = SparseFormatsToCode(formats_vec);
auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
*rv = HeteroGraphRef(hgptr);
});
......
......@@ -305,11 +305,13 @@ void CSR::Save(dmlc::Stream *fs) const {
// COO graph implementation
//
//////////////////////////////////////////////////////////
COO::COO(int64_t num_vertices, IdArray src, IdArray dst) {
COO::COO(int64_t num_vertices, IdArray src, IdArray dst,
bool row_sorted, bool col_sorted) {
CHECK(aten::IsValidIdArray(src));
CHECK(aten::IsValidIdArray(dst));
CHECK_EQ(src->shape[0], dst->shape[0]);
adj_ = aten::COOMatrix{num_vertices, num_vertices, src, dst};
adj_ = aten::COOMatrix{num_vertices, num_vertices, src, dst,
aten::NullArray(), row_sorted, col_sorted};
}
bool COO::IsMultigraph() const {
......@@ -537,8 +539,9 @@ ImmutableGraphPtr ImmutableGraph::CreateFromCSR(const std::string &name) {
}
ImmutableGraphPtr ImmutableGraph::CreateFromCOO(
int64_t num_vertices, IdArray src, IdArray dst) {
COOPtr coo(new COO(num_vertices, src, dst));
int64_t num_vertices, IdArray src, IdArray dst,
bool row_sorted, bool col_sorted) {
COOPtr coo(new COO(num_vertices, src, dst, row_sorted, col_sorted));
return std::make_shared<ImmutableGraph>(coo);
}
......
......@@ -31,6 +31,7 @@
* }
*
*/
#include <dgl/aten/coo.h>
#include <dgl/graph_op.h>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/container.h>
......@@ -236,8 +237,15 @@ ImmutableGraphPtr ToImmutableGraph(GraphPtr g) {
EdgeArray earray = mgr->Edges("eid");
IdArray srcs_array = earray.src;
IdArray dsts_array = earray.dst;
bool row_sorted, col_sorted;
std::tie(row_sorted, col_sorted) = COOIsSorted(
aten::COOMatrix(mgr->NumVertices(), mgr->NumVertices(), srcs_array,
dsts_array));
ImmutableGraphPtr imgptr =
ImmutableGraph::CreateFromCOO(mgr->NumVertices(), srcs_array, dsts_array);
ImmutableGraph::CreateFromCOO(mgr->NumVertices(), srcs_array, dsts_array,
row_sorted, col_sorted);
return imgptr;
}
}
......
......@@ -61,12 +61,15 @@ inline GraphPtr CreateUnitGraphMetaGraph(int num_vtypes) {
class UnitGraph::COO : public BaseHeteroGraph {
public:
COO(GraphPtr metagraph, int64_t num_src, int64_t num_dst, IdArray src, IdArray dst)
COO(GraphPtr metagraph, int64_t num_src, int64_t num_dst, IdArray src,
IdArray dst, bool row_sorted = false, bool col_sorted = false)
: BaseHeteroGraph(metagraph) {
CHECK(aten::IsValidIdArray(src));
CHECK(aten::IsValidIdArray(dst));
CHECK_EQ(src->shape[0], dst->shape[0]) << "Input arrays should have the same length.";
adj_ = aten::COOMatrix{num_src, num_dst, src, dst};
adj_ = aten::COOMatrix{num_src, num_dst, src, dst,
NullArray(),
row_sorted, col_sorted};
}
COO(GraphPtr metagraph, const aten::COOMatrix& coo)
......@@ -1140,12 +1143,14 @@ HeteroSubgraph UnitGraph::EdgeSubgraph(
HeteroGraphPtr UnitGraph::CreateFromCOO(
int64_t num_vtypes, int64_t num_src, int64_t num_dst,
IdArray row, IdArray col,
bool row_sorted, bool col_sorted,
dgl_format_code_t formats) {
CHECK(num_vtypes == 1 || num_vtypes == 2);
if (num_vtypes == 1)
CHECK_EQ(num_src, num_dst);
auto mg = CreateUnitGraphMetaGraph(num_vtypes);
COOPtr coo(new COO(mg, num_src, num_dst, row, col));
COOPtr coo(new COO(mg, num_src, num_dst, row, col,
row_sorted, col_sorted));
return HeteroGraphPtr(
new UnitGraph(mg, nullptr, nullptr, coo, formats));
......
......@@ -174,7 +174,8 @@ class UnitGraph : public BaseHeteroGraph {
/*! \brief Create a graph from COO arrays */
static HeteroGraphPtr CreateFromCOO(
int64_t num_vtypes, int64_t num_src, int64_t num_dst,
IdArray row, IdArray col, dgl_format_code_t formats = ALL_CODE);
IdArray row, IdArray col, bool row_sorted = false,
bool col_sorted = false, dgl_format_code_t formats = ALL_CODE);
static HeteroGraphPtr CreateFromCOO(
int64_t num_vtypes, const aten::COOMatrix& mat,
......
......@@ -64,6 +64,18 @@ def graph1():
g.edata['scalar_w'] = F.copy_to(F.abs(F.randn((g.number_of_edges(),))), F.cpu())
return g
@register_case(['homo', 'row_sorted'])
def graph2():
return dgl.graph(([0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9],
[4, 5, 1, 2, 4, 7, 9, 8 ,6, 4, 1, 0, 1, 0, 2, 3, 5]),
row_sorted=True)
@register_case(['homo', 'row_sorted', 'col_sorted'])
def graph3():
return dgl.graph(([0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9],
[1, 4, 5, 2, 4, 7, 8, 9 ,1, 4, 6, 0, 0, 1, 2, 3, 5]),
row_sorted=True, col_sorted=True)
@register_case(['hetero', 'has_feature'])
def heterograph0():
g = dgl.heterograph({
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment