Commit 25d7fde8 authored by gaoqiong's avatar gaoqiong
Browse files

lite

parent 8439d29f
...@@ -440,7 +440,7 @@ Status call_reduce_matrix_rows(hipStream_t stream, const TIn* input, TOut* outpu ...@@ -440,7 +440,7 @@ Status call_reduce_matrix_rows(hipStream_t stream, const TIn* input, TOut* outpu
HIP_RETURN_IF_ERROR(hipMemsetAsync(output, 0, n * sizeof(TOut), stream)); HIP_RETURN_IF_ERROR(hipMemsetAsync(output, 0, n * sizeof(TOut), stream));
} }
constexpr int max_num_threads_in_block = 512; constexpr int max_num_threads_in_block = 256;
constexpr int max_num_blocks_in_grid = 512; constexpr int max_num_blocks_in_grid = 512;
constexpr int load_count_per_thread = 4; constexpr int load_count_per_thread = 4;
......
...@@ -61,6 +61,8 @@ class ReduceKernel : public RocmKernel, public ReduceKernelBase<allow_multi_axes ...@@ -61,6 +61,8 @@ class ReduceKernel : public RocmKernel, public ReduceKernelBase<allow_multi_axes
template <typename T, miopenReduceTensorIndices_t ReduceTensorIndices = MIOPEN_REDUCE_TENSOR_NO_INDICES> template <typename T, miopenReduceTensorIndices_t ReduceTensorIndices = MIOPEN_REDUCE_TENSOR_NO_INDICES>
Status ComputeImplEx(OpKernelContext* ctx, miopenReduceTensorOp_t miopen_reduce_op) const; Status ComputeImplEx(OpKernelContext* ctx, miopenReduceTensorOp_t miopen_reduce_op) const;
template <typename T, typename OutT, miopenReduceTensorIndices_t ReduceTensorIndices> template <typename T, typename OutT, miopenReduceTensorIndices_t ReduceTensorIndices>
Status ReduceKernelShared( Status ReduceKernelShared(
const T* X, const T* X,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment