"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "7bb5e41ba34cb51c00ae9ca6bc98017054584e5f"
Unverified Commit 5a1ef70f authored by xiang song(charlie.song)'s avatar xiang song(charlie.song) Committed by GitHub
Browse files

[Optimization] Optimize performance of COOSort (#1349)



* Fix coo_sort

* Add COOSort test

* Fix

* omp

* coo2csr can benefit from sorted coo

* Add more test code

* Add LIBCXX_ENABLE_PARALLEL_ALGORITHMS to Makefile

* upd

* Turn off LIBCXX_ENABLE_PARALLEL_ALGORITHMS by default
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-51-214.ec2.internal>
parent f3b866f4
...@@ -25,9 +25,10 @@ endif() ...@@ -25,9 +25,10 @@ endif()
dgl_option(USE_CUDA "Build with CUDA" OFF) dgl_option(USE_CUDA "Build with CUDA" OFF)
dgl_option(USE_OPENMP "Build with OpenMP" ON) dgl_option(USE_OPENMP "Build with OpenMP" ON)
dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF) dgl_option(BUILD_CPP_TEST "Build cpp unittest executables" OFF)
dgl_option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF)
# Set debug compile option for gdb, only happens when -DCMAKE_BUILD_TYPE=DEBUG # Set debug compile option for gdb, only happens when -DCMAKE_BUILD_TYPE=DEBUG
if (NOT MSVC) if (NOT MSVC)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb")
endif(NOT MSVC) endif(NOT MSVC)
if(USE_CUDA) if(USE_CUDA)
...@@ -136,6 +137,11 @@ include(${GKLIB_PATH}/GKlibSystem.cmake) ...@@ -136,6 +137,11 @@ include(${GKLIB_PATH}/GKlibSystem.cmake)
include_directories(${GKLIB_PATH}) include_directories(${GKLIB_PATH})
add_subdirectory("third_party/METIS/libmetis/") add_subdirectory("third_party/METIS/libmetis/")
list(APPEND DGL_LINKER_LIBS metis) list(APPEND DGL_LINKER_LIBS metis)
# support PARALLEL_ALGORITHMS
if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
add_definitions(-DPARALLEL_ALGORITHMS)
endif(LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
endif(NOT MSVC) endif(NOT MSVC)
target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS}) target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS})
......
...@@ -4,6 +4,9 @@ ...@@ -4,6 +4,9 @@
* \brief COO sorting * \brief COO sorting
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#ifdef PARALLEL_ALGORITHMS
#include <parallel/algorithm>
#endif
#include <numeric> #include <numeric>
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
...@@ -21,38 +24,59 @@ COOMatrix COOSort(COOMatrix coo, bool sort_column) { ...@@ -21,38 +24,59 @@ COOMatrix COOSort(COOMatrix coo, bool sort_column) {
// Argsort // Argsort
IdArray new_row = IdArray::Empty({nnz}, coo.row->dtype, coo.row->ctx); IdArray new_row = IdArray::Empty({nnz}, coo.row->dtype, coo.row->ctx);
IdArray new_col = IdArray::Empty({nnz}, coo.col->dtype, coo.col->ctx); IdArray new_col = IdArray::Empty({nnz}, coo.col->dtype, coo.col->ctx);
IdArray new_data = IdArray::Empty({nnz}, coo.row->dtype, coo.row->ctx); IdArray new_idx = IdArray::Empty({nnz}, coo.row->dtype, coo.row->ctx);
IdType* new_row_data = static_cast<IdType*>(new_row->data); IdType* new_row_data = static_cast<IdType*>(new_row->data);
IdType* new_col_data = static_cast<IdType*>(new_col->data); IdType* new_col_data = static_cast<IdType*>(new_col->data);
IdType* new_data_data = static_cast<IdType*>(new_data->data); IdType* new_idx_data = static_cast<IdType*>(new_idx->data);
std::iota(new_data_data, new_data_data + nnz, 0); std::iota(new_idx_data, new_idx_data + nnz, 0);
if (sort_column) { if (sort_column) {
#ifdef PARALLEL_ALGORITHMS
__gnu_parallel::sort(
#else
std::sort( std::sort(
new_data_data, #endif
new_data_data + nnz, new_idx_data,
[coo_row_data, coo_col_data](IdType a, IdType b) { new_idx_data + nnz,
[coo_row_data, coo_col_data](const IdType a, const IdType b) {
return (coo_row_data[a] != coo_row_data[b]) ? return (coo_row_data[a] != coo_row_data[b]) ?
(coo_row_data[a] < coo_row_data[b]) : (coo_row_data[a] < coo_row_data[b]) :
(coo_col_data[a] < coo_col_data[b]); (coo_col_data[a] < coo_col_data[b]);
}); });
} else { } else {
#ifdef PARALLEL_ALGORITHMS
__gnu_parallel::sort(
#else
std::sort( std::sort(
new_data_data, #endif
new_data_data + nnz, new_idx_data,
[coo_row_data](IdType a, IdType b) { new_idx_data + nnz,
return coo_row_data[a] <= coo_row_data[b]; [coo_row_data](const IdType a, const IdType b) {
return coo_row_data[a] < coo_row_data[b];
}); });
} }
// Reorder according to shuffle // Reorder according to shuffle
#pragma omp parallel for
for (IdType i = 0; i < nnz; ++i) { for (IdType i = 0; i < nnz; ++i) {
new_row_data[i] = coo_row_data[new_data_data[i]]; new_row_data[i] = coo_row_data[new_idx_data[i]];
new_col_data[i] = coo_col_data[new_data_data[i]]; new_col_data[i] = coo_col_data[new_idx_data[i]];
}
if (COOHasData(coo)) {
const IdType* coo_data_data = static_cast<IdType*>(coo.data->data);
IdArray new_data = IdArray::Empty({nnz}, coo.row->dtype, coo.row->ctx);
IdType* new_data_data = static_cast<IdType*>(new_data->data);
#pragma omp parallel for
for (IdType i = 0; i < nnz; ++i) {
new_data_data[i] = coo_data_data[new_idx_data[i]];
}
new_idx = new_data;
} }
return COOMatrix{ return COOMatrix{
coo.num_rows, coo.num_cols, std::move(new_row), std::move(new_col), coo.num_rows, coo.num_cols, std::move(new_row), std::move(new_col),
std::move(new_data), true, sort_column}; std::move(new_idx), true, sort_column};
} }
template COOMatrix COOSort<kDLCPU, int32_t>(COOMatrix, bool); template COOMatrix COOSort<kDLCPU, int32_t>(COOMatrix, bool);
......
...@@ -235,15 +235,12 @@ CSRMatrix COOToCSR(COOMatrix coo) { ...@@ -235,15 +235,12 @@ CSRMatrix COOToCSR(COOMatrix coo) {
const IdType* col_data = static_cast<IdType*>(coo.col->data); const IdType* col_data = static_cast<IdType*>(coo.col->data);
const IdType* data = COOHasData(coo)? static_cast<IdType*>(coo.data->data) : nullptr; const IdType* data = COOHasData(coo)? static_cast<IdType*>(coo.data->data) : nullptr;
NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx); NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx);
NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray ret_indices;
NDArray ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray ret_data;
IdType* Bp = static_cast<IdType*>(ret_indptr->data); IdType* Bp = static_cast<IdType*>(ret_indptr->data);
IdType* Bi = static_cast<IdType*>(ret_indices->data);
IdType* Bx = static_cast<IdType*>(ret_data->data);
std::fill(Bp, Bp + N, 0); std::fill(Bp, Bp + N, 0);
for (int64_t i = 0; i < NNZ; ++i) { for (int64_t i = 0; i < NNZ; ++i) {
Bp[row_data[i]]++; Bp[row_data[i]]++;
} }
...@@ -256,18 +253,28 @@ CSRMatrix COOToCSR(COOMatrix coo) { ...@@ -256,18 +253,28 @@ CSRMatrix COOToCSR(COOMatrix coo) {
} }
Bp[N] = NNZ; Bp[N] = NNZ;
for (int64_t i = 0; i < NNZ; ++i) { if (coo.row_sorted == true) {
const IdType r = row_data[i]; ret_indices = coo.col;
Bi[Bp[r]] = col_data[i]; ret_data = coo.data;
Bx[Bp[r]] = data? data[i] : i; } else {
Bp[r]++; ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
} ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx);
IdType* Bi = static_cast<IdType*>(ret_indices->data);
IdType* Bx = static_cast<IdType*>(ret_data->data);
for (int64_t i = 0; i < NNZ; ++i) {
const IdType r = row_data[i];
Bi[Bp[r]] = col_data[i];
Bx[Bp[r]] = data? data[i] : i;
Bp[r]++;
}
// correct the indptr // correct the indptr
for (int64_t i = 0, last = 0; i <= N; ++i) { for (int64_t i = 0, last = 0; i <= N; ++i) {
IdType temp = Bp[i]; IdType temp = Bp[i];
Bp[i] = last; Bp[i] = last;
last = temp; last = temp;
}
} }
return CSRMatrix(coo.num_rows, coo.num_cols, return CSRMatrix(coo.num_rows, coo.num_cols,
......
...@@ -69,6 +69,49 @@ aten::COOMatrix COO2() { ...@@ -69,6 +69,49 @@ aten::COOMatrix COO2() {
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, CTX)); aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 3, 2}), sizeof(IDX)*8, CTX));
} }
template <typename IDX>
aten::CSRMatrix SR_CSR3() {
// [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]]
return aten::CSRMatrix(
4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX),
aten::VecToIdArray(std::vector<IDX>({2, 1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX),
aten::VecToIdArray(std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX),
false);
}
template <typename IDX>
aten::CSRMatrix SRC_CSR3() {
// [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]]
return aten::CSRMatrix(
4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 3, 4, 6, 6}), sizeof(IDX)*8, CTX),
aten::VecToIdArray(std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX),
aten::VecToIdArray(std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, CTX),
false);
}
template <typename IDX>
aten::COOMatrix COO3() {
// has duplicate entries
// [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]]
// row : [0, 2, 0, 1, 2, 0]
// col : [2, 2, 1, 0, 3, 2]
return aten::COOMatrix(
4, 5,
aten::VecToIdArray(std::vector<IDX>({0, 2, 0, 1, 2, 0}), sizeof(IDX)*8, CTX),
aten::VecToIdArray(std::vector<IDX>({2, 2, 1, 0, 3, 2}), sizeof(IDX)*8, CTX));
}
} }
template <typename IDX> template <typename IDX>
...@@ -330,6 +373,46 @@ void _TestCOOToCSR() { ...@@ -330,6 +373,46 @@ void _TestCOOToCSR() {
ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr)); ASSERT_TRUE(ArrayEQ<IDX>(csr.indptr, tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(csr.indices, tcsr.indices)); ASSERT_TRUE(ArrayEQ<IDX>(csr.indices, tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(csr.data, tcsr.data)); ASSERT_TRUE(ArrayEQ<IDX>(csr.data, tcsr.data));
coo = COO1<IDX>();
auto rs_coo = aten::COOSort(coo, false);
auto rs_csr = CSR1<IDX>();
auto rs_tcsr = aten::COOToCSR(rs_coo);
ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indices, rs_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.data, rs_tcsr.data));
coo = COO3<IDX>();
rs_coo = aten::COOSort(coo, false);
rs_csr = SR_CSR3<IDX>();
rs_tcsr = aten::COOToCSR(rs_coo);
ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indptr, rs_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.indices, rs_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(rs_csr.data, rs_tcsr.data));
coo = COO1<IDX>();
auto src_coo = aten::COOSort(coo, true);
auto src_csr = CSR1<IDX>();
auto src_tcsr = aten::COOToCSR(src_coo);
ASSERT_EQ(coo.num_rows, src_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, src_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indptr, src_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indices, src_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.data, src_tcsr.data));
coo = COO3<IDX>();
src_coo = aten::COOSort(coo, true);
src_csr = SRC_CSR3<IDX>();
src_tcsr = aten::COOToCSR(src_coo);
ASSERT_EQ(coo.num_rows, src_tcsr.num_rows);
ASSERT_EQ(coo.num_cols, src_tcsr.num_cols);
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indptr, src_tcsr.indptr));
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.indices, src_tcsr.indices));
ASSERT_TRUE(ArrayEQ<IDX>(src_csr.data, src_tcsr.data));
} }
TEST(SpmatTest, TestCOOToCSR) { TEST(SpmatTest, TestCOOToCSR) {
...@@ -349,3 +432,53 @@ TEST(SpmatTest, TestCOOHasDuplicate) { ...@@ -349,3 +432,53 @@ TEST(SpmatTest, TestCOOHasDuplicate) {
_TestCOOHasDuplicate<int32_t>(); _TestCOOHasDuplicate<int32_t>();
_TestCOOHasDuplicate<int64_t>(); _TestCOOHasDuplicate<int64_t>();
} }
template <typename IDX>
void _TestCOOSort() {
auto coo = COO3<IDX>();
auto sr_coo = COOSort(coo, false);
ASSERT_EQ(coo.num_rows, sr_coo.num_rows);
ASSERT_EQ(coo.num_cols, sr_coo.num_cols);
auto src_coo = COOSort(coo, true);
ASSERT_EQ(coo.num_rows, src_coo.num_rows);
ASSERT_EQ(coo.num_cols, src_coo.num_cols);
// COO3
// [[0, 1, 2, 0, 0],
// [1, 0, 0, 0, 0],
// [0, 0, 1, 1, 0],
// [0, 0, 0, 0, 0]]
// data: [0, 1, 2, 3, 4, 5]
// row : [0, 2, 0, 1, 2, 0]
// col : [2, 2, 1, 0, 3, 2]
// Row Sorted
// data: [0, 2, 5, 3, 1, 4]
// row : [0, 0, 0, 1, 2, 2]
// col : [2, 1, 2, 0, 2, 3]
// Row Col Sorted
// data: [2, 0, 5, 3, 1, 4]
// row : [0, 0, 0, 1, 2, 2]
// col : [1, 2, 2, 0, 2, 3]
auto sort_row = aten::VecToIdArray(
std::vector<IDX>({0, 0, 0, 1, 2, 2}), sizeof(IDX)*8, CTX);
auto unsort_col = aten::VecToIdArray(
std::vector<IDX>({2, 1, 2, 0, 2, 3}), sizeof(IDX)*8, CTX);
auto unsort_col_data = aten::VecToIdArray(
std::vector<IDX>({0, 2, 5, 3, 1, 4}), sizeof(IDX)*8, CTX);
auto sort_col = aten::VecToIdArray(
std::vector<IDX>({1, 2, 2, 0, 2, 3}), sizeof(IDX)*8, CTX);
auto sort_col_data = aten::VecToIdArray(
std::vector<IDX>({2, 0, 5, 3, 1, 4}), sizeof(IDX)*8, CTX);
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.row, sort_row));
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.col, unsort_col));
ASSERT_TRUE(ArrayEQ<IDX>(sr_coo.data, unsort_col_data));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.row, sort_row));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.col, sort_col));
ASSERT_TRUE(ArrayEQ<IDX>(src_coo.data, sort_col_data));
}
TEST(SpmatTest, TestCOOSort) {
_TestCOOSort<int32_t>();
_TestCOOSort<int64_t>();
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment