"mmdet3d/models/vscode:/vscode.git/clone" did not exist on "e63e0473248c57a403b6f1e080f4f57b7f0ead15"
Commit ed4912f2 authored by rocking's avatar rocking
Browse files

Calculate gridSize according to the number of CU.

Remove useless header
parent 38962b98
......@@ -8,11 +8,11 @@
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/tensor_operation/gpu/device/device_put_element.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/stream_utility.hpp"
namespace ck {
namespace tensor_operation {
......@@ -70,27 +70,28 @@ struct DevicePutElementImpl
: p_input_{p_input},
p_indices_{p_indices},
p_output_{p_output},
input_length_raw_{input_length},
elementwise_op_{elementwise_op},
blockSize_{256},
gridSize_{104} // FIXME - Calculate the grid size by number of CU in the future
blockSize_{256}
{
in_grid_desc_ = MakeDescriptor_M(input_length, gridSize_, blockSize_);
}
const InDataType* p_input_;
const IndexDataType* p_indices_;
OutDataType* p_output_;
index_t input_length_raw_;
ElementwiseOperation elementwise_op_;
index_t blockSize_;
index_t gridSize_;
InGrid1dDesc in_grid_desc_;
};
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, const StreamConfig& stream_config = StreamConfig{})
{
index_t gridSize = getAvailableComputeUnitCount(stream_config);
InGrid1dDesc in_grid_desc =
MakeDescriptor_M(arg.input_length_raw_, gridSize, arg.blockSize_);
const auto kernel = kernel_put_element_1d<GridwisePutElement,
InGrid1dDesc,
InDataType,
......@@ -100,10 +101,10 @@ struct DevicePutElementImpl
float elapsed_time = launch_and_time_kernel(stream_config,
kernel,
dim3(arg.gridSize_),
dim3(gridSize),
dim3(arg.blockSize_),
0,
arg.in_grid_desc_,
in_grid_desc,
arg.p_input_,
arg.p_indices_,
arg.p_output_,
......@@ -121,9 +122,8 @@ struct DevicePutElementImpl
bool IsSupportedArgument(const BaseArgument* p_arg) override
{
const Argument* pArg = dynamic_cast<const Argument*>(p_arg);
index_t input_length = pArg->in_grid_desc_.GetTransforms()[I0].GetUpperLengths()[I0];
if(input_length % InVectorSize != 0)
if(pArg->input_length_raw_ % InVectorSize != 0)
{
return false;
}
......
......@@ -6,11 +6,8 @@
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
......@@ -50,7 +47,7 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator
{
int din_length = arg.din_.GetElementSpaceSize();
int dout_length = arg.dout_.GetElementSpaceSize();
std::vector<ConputeDataType> buf(din_length);
std::vector<ConputeDataType> buf(din_length, 0);
for(int i = 0; i < dout_length; ++i)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment