"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "902dcd2cee1df8394506b058d8912e6dcbfdd61f"
Commit 36f6966a authored by Astha Rai's avatar Astha Rai
Browse files

cleaned up comments, rearranged/renamed files

parent 995c6b1c
add_example_executable(example_elementwise_permute_4D_fp16 elementwise_permute_4D_fp16.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_2d elementwise_permute_4D_fp16_2d.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_row elementwise_permute_4D_fp32_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_row elementwise_permute_4D_fp16_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......@@ -27,7 +27,7 @@ using DeviceElementwisePermuteInstance =
UnaryOp, // UnaryOp
Scale, // Scalar
4, // NumDim
8, // MPerThread
1, // MPerThread
ck::Sequence<1>, // InScalarPerVectorSeq
ck::Sequence<1>>; // OutScalarPerVectorSeq
......@@ -60,10 +60,11 @@ int main()
bool do_verification = true;
bool time_kernel = true;
std::vector<std::size_t> nchw = {4, 2, 1, 8};
std::vector<std::size_t> nhwc = {4, 1, 8, 2};
std::vector<std::size_t> nchw = {5, 4, 2, 3};
std::vector<std::size_t> nhwc = {5, 2, 3, 4};
Tensor<ADataType> a(nchw);
Tensor<BDataType> b(nhwc);
float scale = 1.f;
auto i = 0;
for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w)
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
add_example_executable(example_elementwise_permute_4D_fp32_ht elementwise_permute_4D_fp32_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_ht elementwise_permute_4D_fp16_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
......@@ -17,7 +17,7 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple,
typename ElementwiseOperation,
typename UnaryOperation,
typename Scale,
typename Scale,
index_t NumDim>
struct DeviceElementwise : public BaseOperator
{
......@@ -32,7 +32,7 @@ struct DeviceElementwise : public BaseOperator
const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op) = 0;
Scale scale_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; // namespace device
......@@ -41,13 +41,13 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple,
typename ElementwiseOperation,
typename UnaryOperation,
typename Scale,
typename Scale,
index_t NumDim>
using DeviceElementwisePtr = std::unique_ptr<DeviceElementwise<InDataTypeTuple,
OutDataTypeTuple,
ElementwiseOperation,
UnaryOperation,
Scale,
Scale,
NumDim>>;
} // namespace device
......
......@@ -8,8 +8,8 @@
#include "ck/utility/math.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_ht.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_ht.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_scale.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/host_utility/kernel_launch.hpp"
......@@ -23,7 +23,7 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple,
typename ElementwiseOperation,
typename UnaryOperation,
typename Scale,
typename Scale,
index_t NumDim,
index_t MPerThread,
typename InScalarPerVectorSeq,
......@@ -32,7 +32,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypeTuple,
ElementwiseOperation,
UnaryOperation,
Scale,
Scale,
NumDim>
{
static constexpr int NumInput = InDataTypeTuple::Size();
......@@ -135,7 +135,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple,
ElementwiseOperation,
UnaryOperation,
Scale,
Scale,
MPerThread,
InScalarPerVectorSeq,
OutScalarPerVectorSeq>;
......@@ -149,14 +149,14 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op)
Scale scale_op)
: lengths_(lengths),
inStridesArray_(inStridesArray),
outStridesArray_(outStridesArray),
elementwise_op_(elementwise_op),
unary_op_(unary_op),
scale_op_(scale_op),
scale_op_(scale_op),
blockSize_(256)
{
in_dev_buffers_ = generate_tuple(
......@@ -183,7 +183,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
ElementwiseOperation elementwise_op_;
UnaryOperation unary_op_;
Scale scale_op_;
Scale scale_op_;
index_t blockSize_;
};
......@@ -214,7 +214,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple,
ElementwiseOperation,
UnaryOperation,
Scale>;
Scale>;
float elapsed_time = launch_and_time_kernel(stream_config,
kernel,
......@@ -227,7 +227,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
arg.out_dev_buffers_,
arg.elementwise_op_,
arg.unary_op_,
arg.scale_op_);
arg.scale_op_);
return elapsed_time;
}
......@@ -285,7 +285,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op)
Scale scale_op)
{
return Argument{lengths,
inStridesArray,
......@@ -294,7 +294,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers,
elementwise_op,
unary_op,
scale_op};
scale_op};
}
std::unique_ptr<BaseArgument>
......@@ -305,7 +305,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op,
UnaryOperation unary_op,
Scale scale_op) override
Scale scale_op) override
{
return std::make_unique<Argument>(lengths,
inStridesArray,
......@@ -314,7 +314,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers,
elementwise_op,
unary_op,
scale_op);
scale_op);
}
static auto MakeInvoker() { return Invoker{}; }
......
......@@ -7,11 +7,6 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#define UNUSED(expr) \
do \
{ \
(void)(expr); \
} while(0)
namespace ck {
......@@ -163,7 +158,6 @@ struct GridwiseElementwise_1D
},
Number<NumOutput>{});
// const auto& scalar = ScalarMult;
index_t num_iter = M / (loop_step);
do
{
......@@ -211,7 +205,6 @@ struct GridwiseElementwise_1D
Number<NumInput>{});
unpack2(elementwise_op, out_data_refs, in_data_refs);
UNUSED(scale_op);
});
static_for<0, NumOutput, 1>{}([&](auto I) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment