Commit 36f6966a authored by Astha Rai's avatar Astha Rai
Browse files

cleaned up comments, rearranged/renamed files

parent 995c6b1c
add_example_executable(example_elementwise_permute_4D_fp16 elementwise_permute_4D_fp16.cpp) add_example_executable(example_elementwise_permute_4D_fp16 elementwise_permute_4D_fp16.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_2d elementwise_permute_4D_fp16_2d.cpp) add_example_executable(example_elementwise_permute_4D_fp16_2d elementwise_permute_4D_fp16_2d.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_row elementwise_permute_4D_fp32_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_row elementwise_permute_4D_fp16_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
...@@ -27,7 +27,7 @@ using DeviceElementwisePermuteInstance = ...@@ -27,7 +27,7 @@ using DeviceElementwisePermuteInstance =
UnaryOp, // UnaryOp UnaryOp, // UnaryOp
Scale, // Scalar Scale, // Scalar
4, // NumDim 4, // NumDim
8, // MPerThread 1, // MPerThread
ck::Sequence<1>, // InScalarPerVectorSeq ck::Sequence<1>, // InScalarPerVectorSeq
ck::Sequence<1>>; // OutScalarPerVectorSeq ck::Sequence<1>>; // OutScalarPerVectorSeq
...@@ -60,10 +60,11 @@ int main() ...@@ -60,10 +60,11 @@ int main()
bool do_verification = true; bool do_verification = true;
bool time_kernel = true; bool time_kernel = true;
std::vector<std::size_t> nchw = {4, 2, 1, 8}; std::vector<std::size_t> nchw = {5, 4, 2, 3};
std::vector<std::size_t> nhwc = {4, 1, 8, 2}; std::vector<std::size_t> nhwc = {5, 2, 3, 4};
Tensor<ADataType> a(nchw); Tensor<ADataType> a(nchw);
Tensor<BDataType> b(nhwc); Tensor<BDataType> b(nhwc);
float scale = 1.f; float scale = 1.f;
auto i = 0; auto i = 0;
for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w) for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w)
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
......
add_example_executable(example_elementwise_permute_4D_fp32_ht elementwise_permute_4D_fp32_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_ht elementwise_permute_4D_fp16_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
...@@ -17,7 +17,7 @@ template <typename InDataTypeTuple, ...@@ -17,7 +17,7 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple, typename OutDataTypeTuple,
typename ElementwiseOperation, typename ElementwiseOperation,
typename UnaryOperation, typename UnaryOperation,
typename Scale, typename Scale,
index_t NumDim> index_t NumDim>
struct DeviceElementwise : public BaseOperator struct DeviceElementwise : public BaseOperator
{ {
...@@ -32,7 +32,7 @@ struct DeviceElementwise : public BaseOperator ...@@ -32,7 +32,7 @@ struct DeviceElementwise : public BaseOperator
const std::array<void*, NumOutput> out_dev_buffers, const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op, ElementwiseOperation elementwise_op,
UnaryOperation unary_op, UnaryOperation unary_op,
Scale scale_op) = 0; Scale scale_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0; virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; // namespace device }; // namespace device
...@@ -41,13 +41,13 @@ template <typename InDataTypeTuple, ...@@ -41,13 +41,13 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple, typename OutDataTypeTuple,
typename ElementwiseOperation, typename ElementwiseOperation,
typename UnaryOperation, typename UnaryOperation,
typename Scale, typename Scale,
index_t NumDim> index_t NumDim>
using DeviceElementwisePtr = std::unique_ptr<DeviceElementwise<InDataTypeTuple, using DeviceElementwisePtr = std::unique_ptr<DeviceElementwise<InDataTypeTuple,
OutDataTypeTuple, OutDataTypeTuple,
ElementwiseOperation, ElementwiseOperation,
UnaryOperation, UnaryOperation,
Scale, Scale,
NumDim>>; NumDim>>;
} // namespace device } // namespace device
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
#include "ck/utility/math.hpp" #include "ck/utility/math.hpp"
#include "ck/utility/sequence.hpp" #include "ck/utility/sequence.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_ht.hpp" #include "ck/tensor_operation/gpu/device/device_elementwise_scale.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_ht.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
...@@ -23,7 +23,7 @@ template <typename InDataTypeTuple, ...@@ -23,7 +23,7 @@ template <typename InDataTypeTuple,
typename OutDataTypeTuple, typename OutDataTypeTuple,
typename ElementwiseOperation, typename ElementwiseOperation,
typename UnaryOperation, typename UnaryOperation,
typename Scale, typename Scale,
index_t NumDim, index_t NumDim,
index_t MPerThread, index_t MPerThread,
typename InScalarPerVectorSeq, typename InScalarPerVectorSeq,
...@@ -32,7 +32,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -32,7 +32,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypeTuple, OutDataTypeTuple,
ElementwiseOperation, ElementwiseOperation,
UnaryOperation, UnaryOperation,
Scale, Scale,
NumDim> NumDim>
{ {
static constexpr int NumInput = InDataTypeTuple::Size(); static constexpr int NumInput = InDataTypeTuple::Size();
...@@ -135,7 +135,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -135,7 +135,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple, OutDataTypePointerTuple,
ElementwiseOperation, ElementwiseOperation,
UnaryOperation, UnaryOperation,
Scale, Scale,
MPerThread, MPerThread,
InScalarPerVectorSeq, InScalarPerVectorSeq,
OutScalarPerVectorSeq>; OutScalarPerVectorSeq>;
...@@ -149,14 +149,14 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -149,14 +149,14 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers, const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op, ElementwiseOperation elementwise_op,
UnaryOperation unary_op, UnaryOperation unary_op,
Scale scale_op) Scale scale_op)
: lengths_(lengths), : lengths_(lengths),
inStridesArray_(inStridesArray), inStridesArray_(inStridesArray),
outStridesArray_(outStridesArray), outStridesArray_(outStridesArray),
elementwise_op_(elementwise_op), elementwise_op_(elementwise_op),
unary_op_(unary_op), unary_op_(unary_op),
scale_op_(scale_op), scale_op_(scale_op),
blockSize_(256) blockSize_(256)
{ {
in_dev_buffers_ = generate_tuple( in_dev_buffers_ = generate_tuple(
...@@ -183,7 +183,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -183,7 +183,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
ElementwiseOperation elementwise_op_; ElementwiseOperation elementwise_op_;
UnaryOperation unary_op_; UnaryOperation unary_op_;
Scale scale_op_; Scale scale_op_;
index_t blockSize_; index_t blockSize_;
}; };
...@@ -214,7 +214,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -214,7 +214,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple, OutDataTypePointerTuple,
ElementwiseOperation, ElementwiseOperation,
UnaryOperation, UnaryOperation,
Scale>; Scale>;
float elapsed_time = launch_and_time_kernel(stream_config, float elapsed_time = launch_and_time_kernel(stream_config,
kernel, kernel,
...@@ -227,7 +227,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -227,7 +227,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
arg.out_dev_buffers_, arg.out_dev_buffers_,
arg.elementwise_op_, arg.elementwise_op_,
arg.unary_op_, arg.unary_op_,
arg.scale_op_); arg.scale_op_);
return elapsed_time; return elapsed_time;
} }
...@@ -285,7 +285,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -285,7 +285,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers, const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op, ElementwiseOperation elementwise_op,
UnaryOperation unary_op, UnaryOperation unary_op,
Scale scale_op) Scale scale_op)
{ {
return Argument{lengths, return Argument{lengths,
inStridesArray, inStridesArray,
...@@ -294,7 +294,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -294,7 +294,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers, out_dev_buffers,
elementwise_op, elementwise_op,
unary_op, unary_op,
scale_op}; scale_op};
} }
std::unique_ptr<BaseArgument> std::unique_ptr<BaseArgument>
...@@ -305,7 +305,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -305,7 +305,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const std::array<void*, NumOutput> out_dev_buffers, const std::array<void*, NumOutput> out_dev_buffers,
ElementwiseOperation elementwise_op, ElementwiseOperation elementwise_op,
UnaryOperation unary_op, UnaryOperation unary_op,
Scale scale_op) override Scale scale_op) override
{ {
return std::make_unique<Argument>(lengths, return std::make_unique<Argument>(lengths,
inStridesArray, inStridesArray,
...@@ -314,7 +314,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple, ...@@ -314,7 +314,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers, out_dev_buffers,
elementwise_op, elementwise_op,
unary_op, unary_op,
scale_op); scale_op);
} }
static auto MakeInvoker() { return Invoker{}; } static auto MakeInvoker() { return Invoker{}; }
......
...@@ -7,11 +7,6 @@ ...@@ -7,11 +7,6 @@
#include "ck/utility/data_type.hpp" #include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp" #include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#define UNUSED(expr) \
do \
{ \
(void)(expr); \
} while(0)
namespace ck { namespace ck {
...@@ -163,7 +158,6 @@ struct GridwiseElementwise_1D ...@@ -163,7 +158,6 @@ struct GridwiseElementwise_1D
}, },
Number<NumOutput>{}); Number<NumOutput>{});
// const auto& scalar = ScalarMult;
index_t num_iter = M / (loop_step); index_t num_iter = M / (loop_step);
do do
{ {
...@@ -211,7 +205,6 @@ struct GridwiseElementwise_1D ...@@ -211,7 +205,6 @@ struct GridwiseElementwise_1D
Number<NumInput>{}); Number<NumInput>{});
unpack2(elementwise_op, out_data_refs, in_data_refs); unpack2(elementwise_op, out_data_refs, in_data_refs);
UNUSED(scale_op);
}); });
static_for<0, NumOutput, 1>{}([&](auto I) { static_for<0, NumOutput, 1>{}([&](auto I) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment