Commit 985fb0dd authored by turneram's avatar turneram
Browse files

Add n-dimensional inputs

parent 953da942
......@@ -187,15 +187,6 @@ struct ck_elementwise_compiler : compiler<ck_elementwise_compiler>
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
// hip_compile_options options;
// auto out_s = inputs.back();
// options.set_launch_params(v, compute_global_for(ctx, out_s.elements()));
// options.inputs = inputs;
// options.output = out_s;
// options.kernel_name = "ck_elementwise_kernel";
// options.virtual_inputs = inputs;
// return compile_hip_code_object(ck_elementwise_kernel, options);
hip_compile_options options;
options.inputs = inputs;
options.output = inputs.back();
......@@ -208,7 +199,7 @@ struct ck_elementwise_compiler : compiler<ck_elementwise_compiler>
options.set_launch_params(
v,
compute_global_for(ctx,
options.output.elements() / vec.size,
options.output.elements() / (vec.size * 4),
oversubscribe_if(not preloads.is_preloading())));
return compile_hip_code_object(ck_elementwise_kernel, options);
}
......
......@@ -37,46 +37,57 @@
namespace migraphx {
using ADataType = ck::half_t; // float;
using BDataType = ck::half_t; // float;
using CDataType = ck::half_t; // float;
using ElementwiseFunctor = float;
using ADataType = ck::half_t;
using BDataType = ck::half_t;
using CDataType = ck::half_t;
using ElementwiseFunctor = ck::half_t;
static constexpr auto I0 = ck::Number<0>{};
template <class L, class S, class N>
constexpr auto MakeDescriptor_M(const L& lengths, const S& strides, const N& /* ndim */)
template <ck::index_t ndim>
struct CKBinaryElementwise
{
auto gridSize = 72;
auto blockSize = 1024;
constexpr auto ndim = 1;
// auto idx = make_index();
auto tupleOfShape = generate_tuple([&](auto I) { return static_cast<ck::index_t>(lengths[I]); },
ck::Number<ndim>{});
auto tupleOfStride = generate_tuple(
[&](auto I) { return static_cast<ck::index_t>(strides[I]); }, ck::Number<1>{});
const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
auto desc_m = desc;
// merge nd to 1d desc - [s0 * s1 * ...]
if constexpr(ndim > 1)
template <class Desc_M>
constexpr auto PadDescriptor_M_1d(Desc_M desc_m)
{
desc_m = transform_tensor_descriptor(
desc,
make_tuple(make_merge_transform(tupleOfShape)),
make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
make_tuple(ck::Sequence<0>{}));
auto gridSize = 72;
auto blockSize = 1024;
auto MPerThread = 8;
const auto M = desc_m.GetLength(I0);
const ck::index_t loop_step = gridSize * blockSize * MPerThread;
const auto pad = ck::math::integer_least_multiple(M, loop_step) - M;
const auto desc_m_pad =
transform_tensor_descriptor(desc_m,
make_tuple(ck::make_right_pad_transform(M, pad)),
make_tuple(ck::Sequence<0>{}),
make_tuple(ck::Sequence<0>{}));
return desc_m_pad;
}
const auto M = desc_m.GetLength(I0);
const ck::index_t loop_step = /* idx.nglobal(); // */ gridSize * blockSize /* * MPerThread */;
const auto pad = ck::math::integer_least_multiple(M, loop_step) - M;
const auto desc_m_pad =
transform_tensor_descriptor(desc_m,
make_tuple(ck::make_right_pad_transform(M, pad)),
make_tuple(ck::Sequence<0>{}),
make_tuple(ck::Sequence<0>{}));
return desc_m_pad;
}
template <class L, class S>
constexpr auto MakeDescriptor_M(const L& lengths, const S& strides)
{
auto tupleOfShape = generate_tuple([&](auto I) { return static_cast<ck::index_t>(lengths[I]); },
ck::Number<ndim>{});
auto tupleOfStride = generate_tuple(
[&](auto I) { return static_cast<ck::index_t>(strides[I]); }, ck::Number<ndim>{});
const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
// merge nd to 1d desc - [s0 * s1 * ...]
if constexpr(ndim > 1)
{
const auto desc_m = transform_tensor_descriptor(
desc,
make_tuple(make_merge_transform(tupleOfShape)),
make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
make_tuple(ck::Sequence<0>{}));
return PadDescriptor_M_1d(desc_m);
}
else
{
return PadDescriptor_M_1d(desc);
}
}
};
struct Add
{
......@@ -90,26 +101,41 @@ struct Add
template <class T, class U, class V>
__device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t)
{
// auto idx = make_index();
constexpr auto lengths = get_shape_c<T>{}.lens;
constexpr auto strides = get_shape_c<T>{}.strides;
constexpr auto a_desc = MakeDescriptor_M(lengths, strides, 1);
constexpr auto a_lens = get_shape_c<T>{}.lens;
constexpr auto a_strides = get_shape_c<T>{}.strides;
constexpr ck::index_t a_ndim = decltype(a_lens.size()){};
auto a_bin_op = CKBinaryElementwise<a_ndim>{};
constexpr auto a_desc = a_bin_op.MakeDescriptor_M(a_lens, a_strides);
constexpr auto b_lens = get_shape_c<U>{}.lens;
constexpr auto b_strides = get_shape_c<U>{}.strides;
constexpr ck::index_t b_ndim = decltype(b_lens.size()){};
auto b_bin_op = CKBinaryElementwise<b_ndim>{};
constexpr auto b_desc = b_bin_op.MakeDescriptor_M(b_lens, b_strides);
constexpr auto c_lens = get_shape_c<V>{}.lens;
constexpr auto c_strides = get_shape_c<V>{}.strides;
constexpr ck::index_t c_ndim = decltype(c_lens.size()){};
auto c_bin_op = CKBinaryElementwise<c_ndim>{};
constexpr auto c_desc = c_bin_op.MakeDescriptor_M(c_lens, c_strides);
using AGridDesc_M = decltype(a_desc);
using BGridDesc_M = decltype(b_desc);
using CGridDesc_M = decltype(c_desc);
using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType,
BDataType,
CDataType,
CDataType,
AGridDesc_M,
AGridDesc_M,
AGridDesc_M,
BGridDesc_M,
CGridDesc_M,
Add,
4,
4,
4,
4>;
8,
8,
8,
8>;
auto op = Add{};
GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, op);
GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, b_desc, c_desc, op);
}
} // namespace migraphx
......
......@@ -33,10 +33,11 @@ struct ck_elementwise_half : verify_program<ck_elementwise_half>
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape m1_shape{migraphx::shape::half_type, {2000}};
// migraphx::shape m2_shape{migraphx::shape::float_type, {20, 10}};
migraphx::shape m1_shape{migraphx::shape::half_type, {2, 384, 3072}};
migraphx::shape m2_shape{migraphx::shape::half_type, {3072}};
auto l1 = mm->add_parameter("1", m1_shape);
auto l2 = mm->add_parameter("2", m1_shape);
auto l2 = mm->add_parameter("2", m2_shape);
l2 = mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {2, 384, 3072}}}), l2);
mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct ck_elementwise : verify_program<ck_elementwise>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape m1_shape{migraphx::shape::float_type, {20}};
// migraphx::shape m2_shape{migraphx::shape::float_type, {20, 10}};
auto l1 = mm->add_parameter("1", m1_shape);
auto l2 = mm->add_parameter("2", m1_shape);
mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
return p;
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment