Create half_t test

9a7bb6d2 · turneram · f7838bc8 · 9a7bb6d2 · 9a7bb6d2 · 9a7bb6d2
Commit 9a7bb6d2 authored Sep 12, 2022 by turneram
3 changed files
--- a/src/targets/gpu/jit/ck_elementwise.cpp
+++ b/src/targets/gpu/jit/ck_elementwise.cpp
@@ -43,135 +43,135 @@ namespace gpu {
 using namespace migraphx::gpu::gen; // NOLINT
-static const char* const ck_elementwise_kernel = R"__migraphx__(
+// static const char* const ck_elementwise_kernel = R"__migraphx__(
-//#include <migraphx/kernels/ck_elementwise.hpp>
+// //#include <migraphx/kernels/ck_elementwise.hpp>
-#include <migraphx/kernels/ops.hpp>
+// #include <migraphx/kernels/ops.hpp>
-#include <migraphx/kernels/integral_constant.hpp>
+// #include <migraphx/kernels/integral_constant.hpp>
-#include <migraphx/kernels/generic_constant.hpp>
+// #include <migraphx/kernels/generic_constant.hpp>
-#include <args.hpp>
+// #include <args.hpp>
-#include <migraphx/kernels/index.hpp>
+// #include <migraphx/kernels/index.hpp>
-#include <migraphx/kernels/algorithm.hpp>
+// #include <migraphx/kernels/algorithm.hpp>
-#include <migraphx/kernels/integral_constant.hpp>
+// #include <migraphx/kernels/integral_constant.hpp>
-#include <migraphx/kernels/tensor_view.hpp>
+// #include <migraphx/kernels/tensor_view.hpp>
-#include "ck/device_utility/device_prop.hpp"
+// #include "ck/device_utility/device_prop.hpp"
-#include "ck/device_utility/kernel_launch.hpp"
+// #include "ck/device_utility/kernel_launch.hpp"
-#include "ck/tensor_operation/gpu/device/device_base.hpp"
+// #include "ck/tensor_operation/gpu/device/device_base.hpp"
-#include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
+// #include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
-#include "ck/tensor_operation/gpu/grid/gridwise_binary_elementwise_1d.hpp"
+// #include "ck/tensor_operation/gpu/grid/gridwise_binary_elementwise_1d.hpp"
-namespace migraphx {
+// namespace migraphx {
-using ADataType          = float;
+// using ADataType          = float;
-using BDataType          = float;
+// using BDataType          = float;
-using CDataType          = float;
+// using CDataType          = float;
-using ElementwiseFunctor = float;
+// using ElementwiseFunctor = float;
-static constexpr auto I0 = ck::Number<0>{};
+// static constexpr auto I0 = ck::Number<0>{};
-template <class L, class S, class N>
+// template <class L, class S, class N>
-constexpr auto MakeDescriptor_M(const L& lengths, const S& strides, const N& ndim)
+// constexpr auto MakeDescriptor_M(const L& lengths, const S& strides, const N& ndim)
-{
+// {
-    auto gridSize       = 72;
+//     auto gridSize       = 72;
-    auto blockSize      = 1024;
+//     auto blockSize      = 1024;
-    //constexpr auto ndim = 1;
+//     //constexpr auto ndim = 1;
-    // auto idx          = make_index();
+//     // auto idx          = make_index();
-    auto tupleOfShape = generate_tuple([&](auto I) { return static_cast<ck::index_t>(lengths[I]); },
+//     auto tupleOfShape = generate_tuple([&](auto I) { return static_cast<ck::index_t>(lengths[I]); },
-                                       ck::Number<ndim>{});
+//                                        ck::Number<ndim>{});
-    auto tupleOfStride = generate_tuple(
+//     auto tupleOfStride = generate_tuple(
-        [&](auto I) { return static_cast<ck::index_t>(strides[I]); }, ck::Number<1>{});
+//         [&](auto I) { return static_cast<ck::index_t>(strides[I]); }, ck::Number<1>{});
-    const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
+//     const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
-    auto desc_m     = desc;
+//     auto desc_m     = desc;
-    // merge nd to 1d desc - [s0 * s1 * ...]
+//     // merge nd to 1d desc - [s0 * s1 * ...]
-    if constexpr(ndim > 1)
+//     if constexpr(ndim > 1)
-    {
+//     {
-        desc_m = transform_tensor_descriptor(
+//         desc_m = transform_tensor_descriptor(
-            desc,
+//             desc,
-            make_tuple(make_merge_transform(tupleOfShape)),
+//             make_tuple(make_merge_transform(tupleOfShape)),
-            make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
+//             make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
-            make_tuple(ck::Sequence<0>{}));
+//             make_tuple(ck::Sequence<0>{}));
-    }
+//     }
-    const auto M                = desc_m.GetLength(I0);
+//     const auto M                = desc_m.GetLength(I0);
-    const ck::index_t loop_step = /* idx.nglobal(); // */ gridSize * blockSize /*  * MPerThread */;
+//     const ck::index_t loop_step = /* idx.nglobal(); // */ gridSize * blockSize /*  * MPerThread */;
-    const auto pad              = ck::math::integer_least_multiple(M, loop_step) - M;
+//     const auto pad              = ck::math::integer_least_multiple(M, loop_step) - M;
-    const auto desc_m_pad =
+//     const auto desc_m_pad =
-        transform_tensor_descriptor(desc_m,
+//         transform_tensor_descriptor(desc_m,
-                                    make_tuple(ck::make_right_pad_transform(M, pad)),
+//                                     make_tuple(ck::make_right_pad_transform(M, pad)),
-                                    make_tuple(ck::Sequence<0>{}),
+//                                     make_tuple(ck::Sequence<0>{}),
-                                    make_tuple(ck::Sequence<0>{}));
+//                                     make_tuple(ck::Sequence<0>{}));
-    return desc_m_pad;
+//     return desc_m_pad;
-}
+// }
-struct Add
+// struct Add
-{
+// {
-    template <typename Y, typename X0, typename X1>
+//     template <typename Y, typename X0, typename X1>
-    __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const
+//     __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const
-    {
+//     {
-        y = x0 + x1;
+//         y = x0 + x1;
-    };
+//     };
-};
+// };
-extern "C" {
+// extern "C" {
-__global__ void ck_elementwise_kernel(void* a_p, void* b_p, void* c_p) 
+// __global__ void ck_elementwise_kernel(void* a_p, void* b_p, void* c_p) 
-{
+// {
-    make_tensors()(a_p, b_p, c_p)([](auto a_t, auto b_t, auto c_t) { 
+//     make_tensors()(a_p, b_p, c_p)([](auto a_t, auto b_t, auto c_t) { 
-        constexpr auto lengths = get_shape_c<decltype(a_t)>{}.lens;
+//         constexpr auto lengths = get_shape_c<decltype(a_t)>{}.lens;
-        constexpr auto strides = get_shape_c<decltype(a_t)>{}.strides;
+//         constexpr auto strides = get_shape_c<decltype(a_t)>{}.strides;
-        constexpr auto ndim = _c<decltype(lengths.size()){}>[1];
+//         constexpr auto ndim = _c<decltype(lengths.size()){}>[1];
-        constexpr auto a_desc  = MakeDescriptor_M(lengths, strides, ndim);
+//         constexpr auto a_desc  = MakeDescriptor_M(lengths, strides, ndim);
-        using AGridDesc_M        = decltype(a_desc);
+//         using AGridDesc_M        = decltype(a_desc);
-        using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType,
+//         using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType,
-                                                                    BDataType,
+//                                                                     BDataType,
-                                                                    CDataType,
+//                                                                     CDataType,
-                                                                    CDataType,
+//                                                                     CDataType,
-                                                                    AGridDesc_M,
+//                                                                     AGridDesc_M,
-                                                                    AGridDesc_M,
+//                                                                     AGridDesc_M,
-                                                                    AGridDesc_M,
+//                                                                     AGridDesc_M,
-                                                                    Add,
+//                                                                     Add,
-                                                                    1,
+//                                                                     1,
-                                                                    1,
+//                                                                     1,
-                                                                    1,
+//                                                                     1,
-                                                                    1>;
+//                                                                     1>;
-        auto op                  = Add{};
+//         auto op                  = Add{};
-        GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, op);
+//         GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, op);
-    });
+//     });
-}
+// }
-}
+// }
-} // namespace migraphx
+// } // namespace migraphx
-)__migraphx__";
+// )__migraphx__";
 // NOLINTNEXTLINE
-// static const char* const ck_elementwise_kernel = R"__migraphx__(
+static const char* const ck_elementwise_kernel = R"__migraphx__(
-// #include <migraphx/kernels/ck_elementwise.hpp>
+#include <migraphx/kernels/ck_elementwise.hpp>
-// #include <migraphx/kernels/ops.hpp>
+#include <migraphx/kernels/ops.hpp>
-// #include <migraphx/kernels/integral_constant.hpp>
+#include <migraphx/kernels/integral_constant.hpp>
-// #include <migraphx/kernels/generic_constant.hpp>
+#include <migraphx/kernels/generic_constant.hpp>
-// #include <args.hpp>
+#include <args.hpp>
-// namespace migraphx {
+namespace migraphx {
-// extern "C" {
+extern "C" {
-// __global__ void ck_elementwise_kernel(void* a_p, void* b_p, void* c_p)
+__global__ void ck_elementwise_kernel(void* a_p, void* b_p, void* c_p)
-// {
+{
-//     make_tensors()(a_p, b_p, c_p)([](auto&&... xs) {
+    make_tensors()(a_p, b_p, c_p)([](auto&&... xs) {
-//         ck_elementwise(xs...);
+        ck_elementwise(xs...);
-//     });
+    });
-// }
+}
-// }
+}
-// } // namespace migraphx
+} // namespace migraphx
-// )__migraphx__";
+)__migraphx__";
 struct ck_elementwise_compiler : compiler<ck_elementwise_compiler>
 {

--- a/src/targets/gpu/kernels/include/migraphx/kernels/ck_elementwise.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/ck_elementwise.hpp
@@ -37,9 +37,9 @@
 namespace migraphx {
-using ADataType          = float;
+using ADataType          = ck::half_t; //float;
-using BDataType          = float;
+using BDataType          = ck::half_t; //float;
-using CDataType          = float;
+using CDataType          = ck::half_t; //float;
 using ElementwiseFunctor = float;
 static constexpr auto I0 = ck::Number<0>{};
@@ -90,7 +90,7 @@ struct Add
 template <class T, class U, class V>
 __device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t)
 {
-    auto idx               = make_index();
+    //auto idx               = make_index();
    constexpr auto lengths = get_shape_c<T>{}.lens;
    constexpr auto strides = get_shape_c<T>{}.strides;
    constexpr auto a_desc  = MakeDescriptor_M(lengths, strides, 1);
@@ -104,10 +104,10 @@ __device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t)
                                                                AGridDesc_M,
                                                                AGridDesc_M,
                                                                Add,
-                                                                1,
+                                                                4,
-                                                                1,
+                                                                4,
-                                                                1,
+                                                                4,
-                                                                1>;
+                                                                4>;
    auto op                  = Add{};
    GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, op);
 }

--- a/test/verify/0ck_elementwise_half_test.cpp
+++ b/test/verify/0ck_elementwise_half_test.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/make_op.hpp>
+struct ck_elementwise_half : verify_program<ck_elementwise_half>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+        migraphx::shape m1_shape{migraphx::shape::half_type, {2000}};
+        // migraphx::shape m2_shape{migraphx::shape::float_type, {20, 10}};
+        auto l1 = mm->add_parameter("1", m1_shape);
+        auto l2 = mm->add_parameter("2", m1_shape);
+        mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
+        return p;
+    }
+};