Commit ed4912f2 authored by rocking's avatar rocking
Browse files

Calculate gridSize according to the number of CU.

Remove useless header
parent 38962b98
...@@ -8,11 +8,11 @@ ...@@ -8,11 +8,11 @@
#include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/tensor_operation/gpu/device/device_put_element.hpp" #include "ck/tensor_operation/gpu/device/device_put_element.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp"
#include "ck/host_utility/device_prop.hpp" #include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/stream_utility.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -70,27 +70,28 @@ struct DevicePutElementImpl ...@@ -70,27 +70,28 @@ struct DevicePutElementImpl
: p_input_{p_input}, : p_input_{p_input},
p_indices_{p_indices}, p_indices_{p_indices},
p_output_{p_output}, p_output_{p_output},
input_length_raw_{input_length},
elementwise_op_{elementwise_op}, elementwise_op_{elementwise_op},
blockSize_{256}, blockSize_{256}
gridSize_{104} // FIXME - Calculate the grid size by number of CU in the future
{ {
in_grid_desc_ = MakeDescriptor_M(input_length, gridSize_, blockSize_);
} }
const InDataType* p_input_; const InDataType* p_input_;
const IndexDataType* p_indices_; const IndexDataType* p_indices_;
OutDataType* p_output_; OutDataType* p_output_;
index_t input_length_raw_;
ElementwiseOperation elementwise_op_; ElementwiseOperation elementwise_op_;
index_t blockSize_; index_t blockSize_;
index_t gridSize_;
InGrid1dDesc in_grid_desc_;
}; };
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{}) float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
{ {
index_t gridSize = getAvailableComputeUnitCount(stream_config);
InGrid1dDesc in_grid_desc =
MakeDescriptor_M(arg.input_length_raw_, gridSize, arg.blockSize_);
const auto kernel = kernel_put_element_1d<GridwisePutElement, const auto kernel = kernel_put_element_1d<GridwisePutElement,
InGrid1dDesc, InGrid1dDesc,
InDataType, InDataType,
...@@ -100,10 +101,10 @@ struct DevicePutElementImpl ...@@ -100,10 +101,10 @@ struct DevicePutElementImpl
float elapsed_time = launch_and_time_kernel(stream_config, float elapsed_time = launch_and_time_kernel(stream_config,
kernel, kernel,
dim3(arg.gridSize_), dim3(gridSize),
dim3(arg.blockSize_), dim3(arg.blockSize_),
0, 0,
arg.in_grid_desc_, in_grid_desc,
arg.p_input_, arg.p_input_,
arg.p_indices_, arg.p_indices_,
arg.p_output_, arg.p_output_,
...@@ -121,9 +122,8 @@ struct DevicePutElementImpl ...@@ -121,9 +122,8 @@ struct DevicePutElementImpl
bool IsSupportedArgument(const BaseArgument* p_arg) override bool IsSupportedArgument(const BaseArgument* p_arg) override
{ {
const Argument* pArg = dynamic_cast<const Argument*>(p_arg); const Argument* pArg = dynamic_cast<const Argument*>(p_arg);
index_t input_length = pArg->in_grid_desc_.GetTransforms()[I0].GetUpperLengths()[I0];
if(input_length % InVectorSize != 0) if(pArg->input_length_raw_ % InVectorSize != 0)
{ {
return false; return false;
} }
......
...@@ -6,11 +6,8 @@ ...@@ -6,11 +6,8 @@
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include <algorithm>
#include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp" #include "ck/library/utility/host_tensor_generator.hpp"
...@@ -50,7 +47,7 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator ...@@ -50,7 +47,7 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator
{ {
int din_length = arg.din_.GetElementSpaceSize(); int din_length = arg.din_.GetElementSpaceSize();
int dout_length = arg.dout_.GetElementSpaceSize(); int dout_length = arg.dout_.GetElementSpaceSize();
std::vector<ConputeDataType> buf(din_length); std::vector<ConputeDataType> buf(din_length, 0);
for(int i = 0; i < dout_length; ++i) for(int i = 0; i < dout_length; ++i)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment