Commit 2c8e3006 authored by xgqdut2016's avatar xgqdut2016
Browse files

issue/130: add omp cpu

parent 65df17f7
...@@ -42,10 +42,14 @@ void calculate( ...@@ -42,10 +42,14 @@ void calculate(
if (info.is_transed) { if (info.is_transed) {
std::swap(a, b); std::swap(a, b);
} }
#pragma omp parallel for
for (size_t i = 0; i < info.batch; ++i) { for(ptrdiff_t index = 0; index < ptrdiff_t(info.batch * info.m * info.n); ++index){
for (size_t m_ = 0; m_ < info.m; ++m_) { size_t ind = index;
for (size_t n_ = 0; n_ < info.n; ++n_) { size_t n_ = ind % info.n;
ind /= info.n;
size_t m_ = ind % info.m;
ind /= info.m;
size_t i = ind;
auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride; auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride;
float sum = 0; float sum = 0;
for (size_t k_ = 0; k_ < info.k; ++k_) { for (size_t k_ = 0; k_ < info.k; ++k_) {
...@@ -67,8 +71,6 @@ void calculate( ...@@ -67,8 +71,6 @@ void calculate(
*c_ = beta * (*c_) + alpha * sum; *c_ = beta * (*c_) + alpha * sum;
} }
} }
}
}
} }
infiniStatus_t Descriptor::calculate( infiniStatus_t Descriptor::calculate(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment