Commit 36f6966a authored by Astha Rai's avatar Astha Rai
Browse files

cleaned up comments, rearranged/renamed files

parent 995c6b1c
add_example_executable(example_elementwise_permute_4D_fp16 elementwise_permute_4D_fp16.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_2d elementwise_permute_4D_fp16_2d.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_row elementwise_permute_4D_fp32_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_row elementwise_permute_4D_fp16_row.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......@@ -27,7 +27,7 @@ using DeviceElementwisePermuteInstance =
UnaryOp, // UnaryOp
Scale, // Scalar
4, // NumDim
8, // MPerThread
1, // MPerThread
ck::Sequence<1>, // InScalarPerVectorSeq
ck::Sequence<1>>; // OutScalarPerVectorSeq
......@@ -60,10 +60,11 @@ int main()
bool do_verification = true;
bool time_kernel = true;
std::vector<std::size_t> nchw = {4, 2, 1, 8};
std::vector<std::size_t> nhwc = {4, 1, 8, 2};
std::vector<std::size_t> nchw = {5, 4, 2, 3};
std::vector<std::size_t> nhwc = {5, 2, 3, 4};
Tensor<ADataType> a(nchw);
Tensor<BDataType> b(nhwc);
float scale = 1.f;
auto i = 0;
for(std::size_t w = 0; w < a.mDesc.GetLengths()[3]; ++w)
......
......@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl_ht.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
......
add_example_executable(example_elementwise_permute_4D_fp32_ht elementwise_permute_4D_fp32_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_ht elementwise_permute_4D_fp16_ht.cpp)
add_example_executable(example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp)
add_example_executable(example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp)
......@@ -8,8 +8,8 @@
#include "ck/utility/math.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_ht.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_ht.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_scale.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/host_utility/kernel_launch.hpp"
......
......@@ -7,11 +7,6 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#define UNUSED(expr) \
do \
{ \
(void)(expr); \
} while(0)
namespace ck {
......@@ -163,7 +158,6 @@ struct GridwiseElementwise_1D
},
Number<NumOutput>{});
// const auto& scalar = ScalarMult;
index_t num_iter = M / (loop_step);
do
{
......@@ -211,7 +205,6 @@ struct GridwiseElementwise_1D
Number<NumInput>{});
unpack2(elementwise_op, out_data_refs, in_data_refs);
UNUSED(scale_op);
});
static_for<0, NumOutput, 1>{}([&](auto I) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment