Unverified Commit 9dff5419 authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Performance][Hotfix] Disable openmp in arithmetic operation (#2412)



* disable openmp in arithmetic operation

* lint

* Update array_op_impl.cc
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent dc072dca
......@@ -48,8 +48,9 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(minjie): we should split the loop into segments for better cache locality.
#pragma omp parallel for
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// #pragma omp parallel for
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = Op::Call(lhs_data[i], rhs_data[i]);
}
......@@ -84,8 +85,9 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(minjie): we should split the loop into segments for better cache locality.
#pragma omp parallel for
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// #pragma omp parallel for
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = Op::Call(lhs_data[i], rhs);
}
......@@ -120,8 +122,9 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(minjie): we should split the loop into segments for better cache locality.
#pragma omp parallel for
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// #pragma omp parallel for
for (int64_t i = 0; i < rhs->shape[0]; ++i) {
ret_data[i] = Op::Call(lhs, rhs_data[i]);
}
......@@ -156,8 +159,9 @@ IdArray UnaryElewise(IdArray lhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(minjie): we should split the loop into segments for better cache locality.
#pragma omp parallel for
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// #pragma omp parallel for
for (int64_t i = 0; i < lhs->shape[0]; ++i) {
ret_data[i] = Op::Call(lhs_data[i]);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment