"src/include/blockwise_2d_tensor_op.cuh" did not exist on "e7b8705b913c1bb7d216255f1f233ea03c096f1e"
Commit c9c08eb0 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

clang format

parent b16ab01d
...@@ -82,7 +82,7 @@ void argmax(hipStream_t stream, const argument& result, const argument& arg, int ...@@ -82,7 +82,7 @@ void argmax(hipStream_t stream, const argument& result, const argument& arg, int
// load data to lds_data // load data to lds_data
size_t round_item_num = (batch_item_num + block_size - 1) / block_size * block_size; size_t round_item_num = (batch_item_num + block_size - 1) / block_size * block_size;
size_t remaining_item_num = batch_item_num; size_t remaining_item_num = batch_item_num;
data_idx[axis] = 0; data_idx[axis] = 0;
lds_data[max_block_size] = input[arg_s.index(data_idx)]; lds_data[max_block_size] = input[arg_s.index(data_idx)];
lds_index[max_block_size] = 0; lds_index[max_block_size] = 0;
for(size_t i = thr_idx; i < round_item_num; i += block_size) for(size_t i = thr_idx; i < round_item_num; i += block_size)
......
...@@ -82,7 +82,7 @@ void argmin(hipStream_t stream, const argument& result, const argument& arg, int ...@@ -82,7 +82,7 @@ void argmin(hipStream_t stream, const argument& result, const argument& arg, int
// load data to lds_data // load data to lds_data
size_t round_item_num = (batch_item_num + block_size - 1) / block_size * block_size; size_t round_item_num = (batch_item_num + block_size - 1) / block_size * block_size;
size_t remaining_item_num = batch_item_num; size_t remaining_item_num = batch_item_num;
data_idx[axis] = 0; data_idx[axis] = 0;
lds_data[max_block_size] = input[arg_s.index(data_idx)]; lds_data[max_block_size] = input[arg_s.index(data_idx)];
lds_index[max_block_size] = 0; lds_index[max_block_size] = 0;
for(size_t i = thr_idx; i < round_item_num; i += block_size) for(size_t i = thr_idx; i < round_item_num; i += block_size)
......
...@@ -76,12 +76,12 @@ struct pair_min_op ...@@ -76,12 +76,12 @@ struct pair_min_op
template <class T, class Op> template <class T, class Op>
inline __device__ void block_reduce_pair(T* data_ptr, inline __device__ void block_reduce_pair(T* data_ptr,
int64_t* index_ptr, int64_t* index_ptr,
Op op, Op op,
std::size_t block_size, std::size_t block_size,
std::size_t thr_idx, std::size_t thr_idx,
std::size_t item_num, std::size_t item_num,
std::size_t output_index) std::size_t output_index)
{ {
while(true) while(true)
{ {
...@@ -89,8 +89,9 @@ inline __device__ void block_reduce_pair(T* data_ptr, ...@@ -89,8 +89,9 @@ inline __device__ void block_reduce_pair(T* data_ptr,
auto size = item_num / 2; auto size = item_num / 2;
for(std::size_t i = thr_idx; i < size; i += block_size) for(std::size_t i = thr_idx; i < size; i += block_size)
{ {
auto output = op({data_ptr[i], index_ptr[i]}, {data_ptr[i + stride], index_ptr[i + stride]}); auto output =
data_ptr[i] = output.first; op({data_ptr[i], index_ptr[i]}, {data_ptr[i + stride], index_ptr[i + stride]});
data_ptr[i] = output.first;
index_ptr[i] = output.second; index_ptr[i] = output.second;
} }
__syncthreads(); __syncthreads();
...@@ -102,7 +103,8 @@ inline __device__ void block_reduce_pair(T* data_ptr, ...@@ -102,7 +103,8 @@ inline __device__ void block_reduce_pair(T* data_ptr,
if(thr_idx == 0) if(thr_idx == 0)
{ {
auto output = op({data_ptr[output_index], index_ptr[output_index]}, {data_ptr[0], index_ptr[0]}); auto output =
op({data_ptr[output_index], index_ptr[output_index]}, {data_ptr[0], index_ptr[0]});
data_ptr[output_index] = output.first; data_ptr[output_index] = output.first;
index_ptr[output_index] = output.second; index_ptr[output_index] = output.second;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment