Commit 961cf059 authored by turneram's avatar turneram
Browse files

Remove ck from cmakelists

parent 2593dd60
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc /opt/rocm/ck) list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc /opt/rocm/ck)
find_package(miopen) find_package(miopen)
find_package(composable_kernel 1.0.0 COMPONENTS device_operations)
find_package(hip REQUIRED PATHS /opt/rocm) find_package(hip REQUIRED PATHS /opt/rocm)
# rocblas # rocblas
...@@ -393,8 +392,8 @@ endif() ...@@ -393,8 +392,8 @@ endif()
# Workaround broken rocblas headers # Workaround broken rocblas headers
target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1) target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas) target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels composable_kernel::device_operations) target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
target_include_directories(migraphx_gpu PRIVATE /opt/rocm/ck/include) #target_include_directories(migraphx_gpu PRIVATE /opt/rocm/ck/include)
add_subdirectory(driver) add_subdirectory(driver)
......
...@@ -43,111 +43,6 @@ namespace gpu { ...@@ -43,111 +43,6 @@ namespace gpu {
using namespace migraphx::gpu::gen; // NOLINT using namespace migraphx::gpu::gen; // NOLINT
// static const char* const ck_elementwise_kernel = R"__migraphx__(
// //#include <migraphx/kernels/ck_elementwise.hpp>
// #include <migraphx/kernels/ops.hpp>
// #include <migraphx/kernels/integral_constant.hpp>
// #include <migraphx/kernels/generic_constant.hpp>
// #include <args.hpp>
// #include <migraphx/kernels/index.hpp>
// #include <migraphx/kernels/algorithm.hpp>
// #include <migraphx/kernels/integral_constant.hpp>
// #include <migraphx/kernels/tensor_view.hpp>
// #include "ck/device_utility/device_prop.hpp"
// #include "ck/device_utility/kernel_launch.hpp"
// #include "ck/tensor_operation/gpu/device/device_base.hpp"
// #include "ck/tensor_operation/gpu/device/device_elementwise.hpp"
// #include "ck/tensor_operation/gpu/grid/gridwise_binary_elementwise_1d.hpp"
// namespace migraphx {
// using ADataType = float;
// using BDataType = float;
// using CDataType = float;
// using ElementwiseFunctor = float;
// static constexpr auto I0 = ck::Number<0>{};
// template <class L, class S, class N>
// constexpr auto MakeDescriptor_M(const L& lengths, const S& strides, const N& ndim)
// {
// auto gridSize = 72;
// auto blockSize = 1024;
// //constexpr auto ndim = 1;
// // auto idx = make_index();
// auto tupleOfShape = generate_tuple([&](auto I) { return static_cast<ck::index_t>(lengths[I]);
// },
// ck::Number<ndim>{});
// auto tupleOfStride = generate_tuple(
// [&](auto I) { return static_cast<ck::index_t>(strides[I]); }, ck::Number<1>{});
// const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
// auto desc_m = desc;
// // merge nd to 1d desc - [s0 * s1 * ...]
// if constexpr(ndim > 1)
// {
// desc_m = transform_tensor_descriptor(
// desc,
// make_tuple(make_merge_transform(tupleOfShape)),
// make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
// make_tuple(ck::Sequence<0>{}));
// }
// const auto M = desc_m.GetLength(I0);
// const ck::index_t loop_step = /* idx.nglobal(); // */ gridSize * blockSize /* * MPerThread
// */; const auto pad = ck::math::integer_least_multiple(M, loop_step) - M; const
// auto desc_m_pad =
// transform_tensor_descriptor(desc_m,
// make_tuple(ck::make_right_pad_transform(M, pad)),
// make_tuple(ck::Sequence<0>{}),
// make_tuple(ck::Sequence<0>{}));
// return desc_m_pad;
// }
// struct Add
// {
// template <typename Y, typename X0, typename X1>
// __device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const
// {
// y = x0 + x1;
// };
// };
// extern "C" {
// __global__ void ck_elementwise_kernel(void* a_p, void* b_p, void* c_p)
// {
// make_tensors()(a_p, b_p, c_p)([](auto a_t, auto b_t, auto c_t) {
// constexpr auto lengths = get_shape_c<decltype(a_t)>{}.lens;
// constexpr auto strides = get_shape_c<decltype(a_t)>{}.strides;
// constexpr auto ndim = _c<decltype(lengths.size()){}>[1];
// constexpr auto a_desc = MakeDescriptor_M(lengths, strides, ndim);
// using AGridDesc_M = decltype(a_desc);
// using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType,
// BDataType,
// CDataType,
// CDataType,
// AGridDesc_M,
// AGridDesc_M,
// AGridDesc_M,
// Add,
// 1,
// 1,
// 1,
// 1>;
// auto op = Add{};
// GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, op);
// });
// }
// }
// } // namespace migraphx
// )__migraphx__";
// NOLINTNEXTLINE // NOLINTNEXTLINE
static const char* const ck_elementwise_kernel = R"__migraphx__( static const char* const ck_elementwise_kernel = R"__migraphx__(
#include <migraphx/kernels/ck_elementwise.hpp> #include <migraphx/kernels/ck_elementwise.hpp>
...@@ -190,11 +85,15 @@ struct ck_elementwise_compiler : compiler<ck_elementwise_compiler> ...@@ -190,11 +85,15 @@ struct ck_elementwise_compiler : compiler<ck_elementwise_compiler>
hip_compile_options options; hip_compile_options options;
options.inputs = inputs; options.inputs = inputs;
options.output = inputs.back(); options.output = inputs.back();
options.virtual_inputs = reduce_dims(inputs); //options.virtual_inputs = reduce_dims(inputs);
//std::cout << options.virtual_inputs << std::endl;
options.params = "-Wno-float-equal"; options.params = "-Wno-float-equal";
auto axis = find_fast_axis(options.virtual_inputs); // auto axis = find_fast_axis(options.virtual_inputs);
auto vec = vectorize::elements(axis, options.virtual_inputs); // auto vec = vectorize::elements(axis, options.virtual_inputs);
auto preloads = preload::broadcasts(axis, options.virtual_inputs); // auto preloads = preload::broadcasts(axis, options.virtual_inputs);
auto axis = find_fast_axis(inputs);
auto vec = vectorize::elements(axis, inputs);
auto preloads = preload::broadcasts(axis, inputs);
options.kernel_name = "ck_elementwise_kernel"; options.kernel_name = "ck_elementwise_kernel";
options.set_launch_params( options.set_launch_params(
v, v,
......
...@@ -167,6 +167,32 @@ __global__ void ck_gemm_kernel(void* a_p, void* b_p, void* c_p) ...@@ -167,6 +167,32 @@ __global__ void ck_gemm_kernel(void* a_p, void* b_p, void* c_p)
block_2_ctile_map, block_2_ctile_map,
ck::integral_constant<bool, HasMainKBlockLoop>{}, ck::integral_constant<bool, HasMainKBlockLoop>{},
ck::integral_constant<bool, HasDoubleTailKBlockLoop>{}); ck::integral_constant<bool, HasDoubleTailKBlockLoop>{});
// using AGridDesc_K0_M0_M1_K1 =
// decltype(GridwiseGemm::MakeAGridDescriptor_K0_M0_M1_K1(AGridDesc_K0_M_K1{}));
// using BGridDesc_K0_N0_N1_K1 =
// decltype(GridwiseGemm::MakeBGridDescriptor_K0_N0_N1_K1(BGridDesc_K0_N_K1{}));
// using CGridDesc_M0_M10_M11_N0_N10_N11 =
// decltype(GridwiseGemm::MakeCGridDescriptor_M0_M10_M11_N0_N10_N11(CGridDesc_M_N{}));
// using DefaultBlock2CTileMap =
// decltype(GridwiseGemm::MakeDefaultBlock2CTileMap(CGridDesc_M_N{}));
// const auto kernel = ck::kernel_gemm_dl_v1r3<GridwiseGemm,
// ADataType,
// CDataType,
// remove_reference_t<AGridDesc_K0_M0_M1_K1>,
// remove_reference_t<BGridDesc_K0_N0_N1_K1>,
// remove_reference_t<CGridDesc_M0_M10_M11_N0_N10_N11>,
// remove_reference_t<DefaultBlock2CTileMap>,
// true,
// true>;
// kernel(a_t.data(),
// b_t.data(),
// c_t.data(),
// a_grid_desc_k0_m0_m1_k1,
// b_grid_desc_k0_n0_n1_k1,
// c_grid_desc_m0_m10_m11_n0_n10_n11,
// block_2_ctile_map);
}); });
} }
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iostream>
#include <vector>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
template <typename ALayout,
typename BLayout,
typename CLayout,
typename ADataType,
typename BDataType,
typename CDataType,
typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation>
struct DeviceGemm : public BaseOperator
{
virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_a,
const void* p_b,
void* p_c,
ck::index_t M,
ck::index_t N,
ck::index_t K,
ck::index_t StrideA,
ck::index_t StrideB,
ck::index_t StrideC,
AElementwiseOperation a_element_op,
BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
} // namespace device
} // namespace tensor_operation
} // namespace ck
\ No newline at end of file
...@@ -24,6 +24,8 @@ ...@@ -24,6 +24,8 @@
#ifndef MIGRAPHX_GUARD_KERNELS_CK_ELEMENTWISE_HPP #ifndef MIGRAPHX_GUARD_KERNELS_CK_ELEMENTWISE_HPP
#define MIGRAPHX_GUARD_KERNELS_CK_ELEMENTWISE_HPP #define MIGRAPHX_GUARD_KERNELS_CK_ELEMENTWISE_HPP
#include <stdio.h>
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp> #include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/integral_constant.hpp> #include <migraphx/kernels/integral_constant.hpp>
...@@ -48,7 +50,7 @@ template <ck::index_t ndim> ...@@ -48,7 +50,7 @@ template <ck::index_t ndim>
struct CKBinaryElementwise struct CKBinaryElementwise
{ {
template <class Desc_M> template <class Desc_M>
constexpr auto PadDescriptor_M_1d(Desc_M desc_m) __device__ constexpr auto PadDescriptor_M_1d(Desc_M desc_m)
{ {
auto gridSize = 72; auto gridSize = 72;
auto blockSize = 1024; auto blockSize = 1024;
...@@ -65,7 +67,7 @@ struct CKBinaryElementwise ...@@ -65,7 +67,7 @@ struct CKBinaryElementwise
} }
template <class L, class S> template <class L, class S>
constexpr auto MakeDescriptor_M(const L& lengths, const S& strides) __device__ constexpr auto MakeDescriptor_M(const L& lengths, const S& strides)
{ {
auto tupleOfShape = generate_tuple( auto tupleOfShape = generate_tuple(
[&](auto I) { return static_cast<ck::index_t>(lengths[I]); }, ck::Number<ndim>{}); [&](auto I) { return static_cast<ck::index_t>(lengths[I]); }, ck::Number<ndim>{});
...@@ -89,6 +91,51 @@ struct CKBinaryElementwise ...@@ -89,6 +91,51 @@ struct CKBinaryElementwise
} }
}; };
template <ck::index_t ndim>
struct CKBinaryElementwise2
{
template <class Desc_M>
/* constexpr */__device__ auto PadDescriptor_M_1d(Desc_M desc_m)
{
auto gridSize = 72;
auto blockSize = 1024;
auto MPerThread = 8;
const auto M = desc_m.GetLength(I0);
const ck::index_t loop_step = gridSize * blockSize * MPerThread;
const auto pad = ck::math::integer_least_multiple(M, loop_step) - M;
const auto desc_m_pad =
transform_tensor_descriptor(desc_m,
make_tuple(ck::make_right_pad_transform(M, pad)),
make_tuple(ck::Sequence<0>{}),
make_tuple(ck::Sequence<0>{}));
return desc_m_pad;
}
template <class L, class S>
/* constexpr */__device__ auto MakeDescriptor_M(const L& lengths, const S& strides)
{
auto tupleOfShape = generate_tuple(
[&](auto I) { return static_cast<ck::index_t>(lengths[I]); }, ck::Number<ndim>{});
auto tupleOfStride = generate_tuple(
[&](auto I) { printf ("Stride %i: %i\n", int(I), int(strides[I])); return static_cast<ck::index_t>(strides[I]); }, ck::Number<ndim>{});
const auto desc = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
// merge nd to 1d desc - [s0 * s1 * ...]
if constexpr(ndim > 1)
{
const auto desc_m = transform_tensor_descriptor(
desc,
make_tuple(make_merge_transform(tupleOfShape)),
make_tuple(generate_sequence_v2([&](auto I) { return I; }, ck::Number<ndim>{})),
make_tuple(ck::Sequence<0>{}));
return PadDescriptor_M_1d(desc_m);
}
else
{
return PadDescriptor_M_1d(desc);
}
}
};
struct Add struct Add
{ {
template <typename Y, typename X0, typename X1> template <typename Y, typename X0, typename X1>
...@@ -98,30 +145,57 @@ struct Add ...@@ -98,30 +145,57 @@ struct Add
}; };
}; };
struct Mul
{
template <typename Y, typename X0, typename X1>
__device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const
{
y = x0 * x1;
};
};
struct Div
{
template <typename Y, typename X0, typename X1>
__device__ constexpr void operator()(Y& y, const X0& x0, const X1& x1) const
{
y = x0 / x1;
};
};
template <class T, class U, class V> template <class T, class U, class V>
__device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t) __device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t)
{ {
//auto idx = make_index();
constexpr auto a_lens = get_shape_c<T>{}.lens; constexpr auto a_lens = get_shape_c<T>{}.lens;
constexpr auto a_strides = get_shape_c<T>{}.strides; constexpr auto a_strides = get_shape_c<T>{}.strides;
constexpr ck::index_t a_ndim = decltype(a_lens.size()){}; constexpr ck::index_t a_ndim = a_lens.size(); //decltype(a_lens.size()){};
// if (idx.global == 0)
// printf("a_ndim: %i\n", int(a_ndim));
auto a_bin_op = CKBinaryElementwise<a_ndim>{}; auto a_bin_op = CKBinaryElementwise<a_ndim>{};
constexpr auto a_desc = a_bin_op.MakeDescriptor_M(a_lens, a_strides); constexpr auto a_desc = a_bin_op.MakeDescriptor_M(a_lens, a_strides);
constexpr auto b_lens = get_shape_c<U>{}.lens; constexpr auto b_lens = get_shape_c<U>{}.lens;
constexpr auto b_strides = get_shape_c<U>{}.strides; constexpr auto b_strides = get_shape_c<U>{}.strides;
constexpr ck::index_t b_ndim = decltype(b_lens.size()){}; constexpr ck::index_t b_ndim = b_lens.size(); //decltype(b_lens.size()){};
// if (idx.global == 0)
// printf("b_ndim: %i\n", int(b_ndim));
auto b_bin_op = CKBinaryElementwise<b_ndim>{}; auto b_bin_op = CKBinaryElementwise<b_ndim>{};
constexpr auto b_desc = b_bin_op.MakeDescriptor_M(b_lens, b_strides); constexpr auto b_desc = b_bin_op.MakeDescriptor_M(b_lens, b_strides);
constexpr auto c_lens = get_shape_c<V>{}.lens; constexpr auto c_lens = get_shape_c<V>{}.lens;
constexpr auto c_strides = get_shape_c<V>{}.strides; constexpr auto c_strides = get_shape_c<V>{}.strides;
constexpr ck::index_t c_ndim = decltype(c_lens.size()){}; constexpr ck::index_t c_ndim = c_lens.size(); //decltype(c_lens.size()){};
auto c_bin_op = CKBinaryElementwise<c_ndim>{}; auto c_bin_op = CKBinaryElementwise<c_ndim>{};
constexpr auto c_desc = c_bin_op.MakeDescriptor_M(c_lens, c_strides); constexpr auto c_desc = c_bin_op.MakeDescriptor_M(c_lens, c_strides);
using AGridDesc_M = decltype(a_desc); using AGridDesc_M = decltype(a_desc);
using BGridDesc_M = decltype(b_desc); using BGridDesc_M = decltype(b_desc);
using CGridDesc_M = decltype(c_desc); using CGridDesc_M = decltype(c_desc);
constexpr ck::index_t MPerThread = 8;
constexpr ck::index_t AScalarPerVector = 8;
constexpr ck::index_t BScalarPerVector = 8;
constexpr ck::index_t CScalarPerVector = 8;
using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType, using GridwiseBinEltwise = ck::GridwiseBinaryElementwise_1D<ADataType,
BDataType, BDataType,
CDataType, CDataType,
...@@ -130,10 +204,10 @@ __device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t) ...@@ -130,10 +204,10 @@ __device__ void ck_elementwise(const T& a_t, const U& b_t, const V& c_t)
BGridDesc_M, BGridDesc_M,
CGridDesc_M, CGridDesc_M,
Add, Add,
8, MPerThread,
8, AScalarPerVector,
8, BScalarPerVector,
8>; CScalarPerVector>;
auto op = Add{}; auto op = Add{};
GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, b_desc, c_desc, op); GridwiseBinEltwise::Run(a_t.data(), b_t.data(), c_t.data(), a_desc, b_desc, c_desc, op);
} }
......
...@@ -27,6 +27,53 @@ ...@@ -27,6 +27,53 @@
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
// struct ck_elementwise_half : verify_program<ck_elementwise_half>
// {
// migraphx::program create_program() const
// {
// migraphx::program p;
// auto* mm = p.get_main_module();
// migraphx::shape m1_shape{migraphx::shape::half_type, {1, 1, 2, 2, 2}};
// migraphx::shape m2_shape{migraphx::shape::half_type, {1, 1, 1, 2, 1}};
// std::vector<float> v1(8, 1);
// std::vector<float> v2(2);
// std::iota(v2.begin(), v2.end(), 1);
// auto l1 = mm->add_literal(migraphx::literal{m1_shape, v1});
// auto l2 = mm->add_literal(migraphx::literal{m2_shape, v2});
// l2 = mm->add_instruction(
// migraphx::make_op("multibroadcast", {{"out_lens", {1, 1, 2, 2, 2}}}), l2);
// //l2 = mm->add_instruction(migraphx::make_op("contiguous"), l2);
// mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
// mm->debug_print();
// return p;
// }
// };
// struct ck_elementwise_half : verify_program<ck_elementwise_half>
// {
// migraphx::program create_program() const
// {
// migraphx::program p;
// auto* mm = p.get_main_module();
// migraphx::shape m1_shape{migraphx::shape::half_type, {2, 384, 3072}};
// migraphx::shape m2_shape{migraphx::shape::half_type, {1, 384, 1}};
// std::vector<float> v1(2*384*3072, 1);
// std::vector<float> v2(384, 2.54);
// auto l1 = mm->add_literal(migraphx::literal{m1_shape, v1});
// auto l2 = mm->add_literal(migraphx::literal{m2_shape, v2});
// l2 = mm->add_instruction(
// migraphx::make_op("multibroadcast", {{"out_lens", {2, 384, 3072}}}), l2);
// //l2 = mm->add_instruction(migraphx::make_op("contiguous"), l2);
// mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
// mm->debug_print();
// return p;
// }
// };
struct ck_elementwise_half : verify_program<ck_elementwise_half> struct ck_elementwise_half : verify_program<ck_elementwise_half>
{ {
migraphx::program create_program() const migraphx::program create_program() const
...@@ -34,7 +81,7 @@ struct ck_elementwise_half : verify_program<ck_elementwise_half> ...@@ -34,7 +81,7 @@ struct ck_elementwise_half : verify_program<ck_elementwise_half>
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
migraphx::shape m1_shape{migraphx::shape::half_type, {2, 384, 3072}}; migraphx::shape m1_shape{migraphx::shape::half_type, {2, 384, 3072}};
migraphx::shape m2_shape{migraphx::shape::half_type, {3072}}; migraphx::shape m2_shape{migraphx::shape::half_type, {1, 384, 1}};
auto l1 = mm->add_parameter("1", m1_shape); auto l1 = mm->add_parameter("1", m1_shape);
auto l2 = mm->add_parameter("2", m2_shape); auto l2 = mm->add_parameter("2", m2_shape);
l2 = mm->add_instruction( l2 = mm->add_instruction(
...@@ -45,3 +92,24 @@ struct ck_elementwise_half : verify_program<ck_elementwise_half> ...@@ -45,3 +92,24 @@ struct ck_elementwise_half : verify_program<ck_elementwise_half>
return p; return p;
} }
}; };
// struct ck_elementwise_half : verify_program<ck_elementwise_half>
// {
// migraphx::program create_program() const
// {
// migraphx::program p;
// auto* mm = p.get_main_module();
// migraphx::shape m1_shape{migraphx::shape::half_type, {3072}};
// migraphx::shape m2_shape{migraphx::shape::half_type, {1}};
// auto l1 = mm->add_parameter("1", m1_shape);
// auto l2 = mm->add_parameter("2", m2_shape);
// l2 = mm->add_instruction(
// migraphx::make_op("multibroadcast", {{"out_lens", {3072}}}), l2);
// //l2 = mm->add_instruction(migraphx::make_op("contiguous"), l2);
// mm->add_instruction(migraphx::make_op("ck_elementwise"), l1, l2);
// //mm->debug_print();
// return p;
// }
// };
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment