Unverified Commit 4698993d authored by Po Yen Chen's avatar Po Yen Chen Committed by GitHub
Browse files

Merge branch 'develop' into wmma_op

parents ab663329 7038723a
...@@ -61,7 +61,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config, ...@@ -61,7 +61,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
std::array<ck::index_t, NDimSpatial> input_left_pads{}; std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{}; std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; auto copy = [](const auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths); copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths);
copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides); copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides);
...@@ -157,7 +157,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config, ...@@ -157,7 +157,7 @@ bool run_conv_bwd_data_bias_relu(const ExecutionConfig& config,
in_device_buf.FromDevice(in_device.mData.data()); in_device_buf.FromDevice(in_device.mData.data());
return ck::utils::check_err(in_device.mData, in_host.mData); return ck::utils::check_err(in_device, in_host);
} }
return true; return true;
......
...@@ -52,7 +52,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config, ...@@ -52,7 +52,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
std::array<ck::index_t, NDimSpatial> input_left_pads{}; std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{}; std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths); copy(out_g_n_k_wos_desc.GetLengths(), a_g_n_k_wos_lengths);
copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides); copy(out_g_n_k_wos_desc.GetStrides(), a_g_n_k_wos_strides);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/utility/type.hpp" #include "ck/utility/type.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/fill.hpp" #include "ck/library/utility/fill.hpp"
...@@ -247,19 +248,6 @@ inline auto to_array(Range& range) noexcept ...@@ -247,19 +248,6 @@ inline auto to_array(Range& range) noexcept
return detail::to_array_proxy<ck::remove_cvref_t<Range>>{range}; return detail::to_array_proxy<ck::remove_cvref_t<Range>>{range};
} }
namespace ranges {
template <typename InputRange, typename OutputIterator>
inline auto copy(InputRange&& range, OutputIterator iter)
-> decltype(std::copy(std::begin(std::forward<InputRange>(range)),
std::end(std::forward<InputRange>(range)),
iter))
{
return std::copy(std::begin(std::forward<InputRange>(range)),
std::end(std::forward<InputRange>(range)),
iter);
}
} // namespace ranges
template <typename Axes> template <typename Axes>
inline auto is_valid_axes(const Axes& axes) inline auto is_valid_axes(const Axes& axes)
-> std::enable_if_t<detail::is_random_access_range_v<Axes>, bool> -> std::enable_if_t<detail::is_random_access_range_v<Axes>, bool>
...@@ -350,7 +338,7 @@ auto extend_shape(const Problem::Shape& shape, std::size_t new_dim) ...@@ -350,7 +338,7 @@ auto extend_shape(const Problem::Shape& shape, std::size_t new_dim)
using std::begin, std::end; using std::begin, std::end;
std::copy(begin(shape), end(shape), begin(extended_shape)); ck::ranges::copy(shape, begin(extended_shape));
extended_shape.back() = new_dim; extended_shape.back() = new_dim;
return extended_shape; return extended_shape;
...@@ -362,7 +350,7 @@ auto extend_axes(const Problem::Axes& axes) ...@@ -362,7 +350,7 @@ auto extend_axes(const Problem::Axes& axes)
using std::begin, std::end; using std::begin, std::end;
std::copy(begin(axes), end(axes), begin(extended_axes)); ck::ranges::copy(axes, begin(extended_axes));
extended_axes.back() = detail::get_array_size_v<Problem::Axes>; extended_axes.back() = detail::get_array_size_v<Problem::Axes>;
return extended_axes; return extended_axes;
......
...@@ -57,7 +57,7 @@ bool run_permute_bundle(const Problem& problem) ...@@ -57,7 +57,7 @@ bool run_permute_bundle(const Problem& problem)
using std::begin; using std::begin;
Tensor<DataType> input_tensor(input_shape); Tensor<DataType> input_tensor(input_shape);
ranges::copy(input_bundle_tensor.AsSpan<const DataType>(), begin(input_tensor)); ck::ranges::copy(input_bundle_tensor.AsSpan<const DataType>(), begin(input_tensor));
Tensor<DataType> output_tensor(transpose(input_shape, input_axes)); Tensor<DataType> output_tensor(transpose(input_shape, input_axes));
if(!host_permute(input_tensor, input_axes, PassThrough{}, output_tensor)) if(!host_permute(input_tensor, input_axes, PassThrough{}, output_tensor))
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
......
...@@ -97,7 +97,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification, ...@@ -97,7 +97,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
std::array<ck::index_t, NDimSpatial> input1_left_pads{}; std::array<ck::index_t, NDimSpatial> input1_left_pads{};
std::array<ck::index_t, NDimSpatial> input1_right_pads{}; std::array<ck::index_t, NDimSpatial> input1_right_pads{};
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; auto copy = [](const auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(in0_g_n_c_wis_desc.GetLengths(), a0_g_n_c_wis_lengths); copy(in0_g_n_c_wis_desc.GetLengths(), a0_g_n_c_wis_lengths);
copy(in0_g_n_c_wis_desc.GetStrides(), a0_g_n_c_wis_strides); copy(in0_g_n_c_wis_desc.GetStrides(), a0_g_n_c_wis_strides);
...@@ -120,18 +120,14 @@ bool run_grouped_conv_conv_fwd(bool do_verification, ...@@ -120,18 +120,14 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
const ck::index_t gemm_batch = a0_g_n_c_wis_lengths[0]; const ck::index_t gemm_batch = a0_g_n_c_wis_lengths[0];
const ck::index_t gemm0_m_length = const ck::index_t gemm0_m_length =
e1_g_n_k_wos_lengths[1] * std::accumulate(e1_g_n_k_wos_lengths.begin() + 3, e1_g_n_k_wos_lengths[1] *
e1_g_n_k_wos_lengths.begin() + 3 + NDimSpatial, ck::accumulate_n<ck::index_t>(
ck::index_t{1}, e1_g_n_k_wos_lengths.begin() + 3, NDimSpatial, 1, std::multiplies<>{});
std::multiplies<ck::index_t>{});
const ck::index_t gemm0_n_length = b0_g_k_c_xs_lengths[1]; const ck::index_t gemm0_n_length = b0_g_k_c_xs_lengths[1];
const ck::index_t gemm0_k_length = const ck::index_t gemm0_k_length = ck::accumulate_n<ck::index_t>(
std::accumulate(b0_g_k_c_xs_lengths.begin() + 2, b0_g_k_c_xs_lengths.begin() + 2, NDimSpatial + 1, 1, std::multiplies<>{});
b0_g_k_c_xs_lengths.begin() + 2 + NDimSpatial + 1,
ck::index_t{1},
std::multiplies<ck::index_t>{});
const ck::index_t gemm1_n_length = b1_g_k_c_xs_lengths[1]; const ck::index_t gemm1_n_length = b1_g_k_c_xs_lengths[1];
...@@ -261,7 +257,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification, ...@@ -261,7 +257,7 @@ bool run_grouped_conv_conv_fwd(bool do_verification,
#endif #endif
return ck::utils::check_err( return ck::utils::check_err(
out1_device.mData, out1_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f); out1_device, out1_host, "Error: incorrect results!", 1e-5f, 1e-4f);
} }
return true; return true;
......
...@@ -100,9 +100,9 @@ int main(int argc, char* argv[]) ...@@ -100,9 +100,9 @@ int main(int argc, char* argv[])
Tensor<GammaDataType> gamma({G, C}); Tensor<GammaDataType> gamma({G, C});
Tensor<BetaDataType> beta({G, C}); Tensor<BetaDataType> beta({G, C});
ck::utils::FillUniformDistribution<XDataType>{0.f, 1.f}(x.begin(), x.end()); ck::utils::FillUniformDistribution<XDataType>{0.f, 1.f}(x);
ck::utils::FillUniformDistribution<GammaDataType>{0.f, 1.f}(gamma.begin(), gamma.end()); ck::utils::FillUniformDistribution<GammaDataType>{0.f, 1.f}(gamma);
ck::utils::FillUniformDistribution<BetaDataType>{0.f, 1.f}(beta.begin(), beta.end()); ck::utils::FillUniformDistribution<BetaDataType>{0.f, 1.f}(beta);
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpaceSize()); DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpaceSize());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize()); DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
...@@ -167,7 +167,7 @@ int main(int argc, char* argv[]) ...@@ -167,7 +167,7 @@ int main(int argc, char* argv[])
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
y_dev.FromDevice(y.mData.data()); y_dev.FromDevice(y.mData.data());
pass &= ck::utils::check_err(y.mData, host_y.mData, "Error: Incorrect results", 1e-3, 1e-3); pass &= ck::utils::check_err(y, host_y, "Error: Incorrect results", 1e-3, 1e-3);
} }
return (pass ? 0 : 1); return (pass ? 0 : 1);
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
...@@ -144,7 +145,7 @@ bool run_grouped_conv_fwd(bool do_verification, ...@@ -144,7 +145,7 @@ bool run_grouped_conv_fwd(bool do_verification,
std::array<ck::index_t, NDimSpatial> input_left_pads{}; std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{}; std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
...@@ -131,7 +132,7 @@ bool run_grouped_conv_fwd(bool do_verification, ...@@ -131,7 +132,7 @@ bool run_grouped_conv_fwd(bool do_verification,
std::array<ck::index_t, NDimSpatial> input_left_pads{}; std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{}; std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); }; auto copy = [](auto& x, auto& y) { ck::ranges::copy(x, y.begin()); };
copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths); copy(in_g_n_c_wis_desc.GetLengths(), a_g_n_c_wis_lengths);
copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides); copy(in_g_n_c_wis_desc.GetStrides(), a_g_n_c_wis_strides);
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise.hpp" #include "ck/tensor_operation/gpu/device/impl/device_elementwise.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
...@@ -69,7 +70,7 @@ int main() ...@@ -69,7 +70,7 @@ int main()
static_cast<int>(nhwc[2] * nhwc[3]), static_cast<int>(nhwc[2] * nhwc[3]),
static_cast<int>(nhwc[3])}; static_cast<int>(nhwc[3])};
std::copy(nchw.begin(), nchw.end(), ab_lengths.begin()); ck::ranges::copy(nchw, ab_lengths.begin());
auto broadcastPermute = DeviceElementwisePermuteInstance{}; auto broadcastPermute = DeviceElementwisePermuteInstance{};
auto argument = broadcastPermute.MakeArgumentPointer( auto argument = broadcastPermute.MakeArgumentPointer(
......
...@@ -12,6 +12,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME) ...@@ -12,6 +12,7 @@ function(add_example_executable EXAMPLE_NAME FILE_NAME)
add_test(NAME ${EXAMPLE_NAME} COMMAND $<TARGET_FILE:${EXAMPLE_NAME}> ${ARGN}) add_test(NAME ${EXAMPLE_NAME} COMMAND $<TARGET_FILE:${EXAMPLE_NAME}> ${ARGN})
add_dependencies(examples ${EXAMPLE_NAME}) add_dependencies(examples ${EXAMPLE_NAME})
add_dependencies(check ${EXAMPLE_NAME}) add_dependencies(check ${EXAMPLE_NAME})
rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples)
endfunction(add_example_executable EXAMPLE_NAME) endfunction(add_example_executable EXAMPLE_NAME)
function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
...@@ -19,6 +20,7 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME) ...@@ -19,6 +20,7 @@ function(add_example_executable_no_testing EXAMPLE_NAME FILE_NAME)
add_executable(${EXAMPLE_NAME} ${FILE_NAME}) add_executable(${EXAMPLE_NAME} ${FILE_NAME})
target_link_libraries(${EXAMPLE_NAME} PRIVATE utility) target_link_libraries(${EXAMPLE_NAME} PRIVATE utility)
add_dependencies(examples ${EXAMPLE_NAME}) add_dependencies(examples ${EXAMPLE_NAME})
rocm_install(TARGETS ${EXAMPLE_NAME} COMPONENT examples)
endfunction(add_example_executable_no_testing EXAMPLE_NAME) endfunction(add_example_executable_no_testing EXAMPLE_NAME)
# add all example subdir # add all example subdir
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
template <ck::index_t NDimSpatial,
typename InputLayout,
typename WeightLayout,
typename OutputLayout,
typename InputDataType,
typename WeightDataType,
typename OutputDataType,
typename InputElementwiseOperation,
typename WeightElementwiseOperation,
typename OutputElementwiseOperation>
struct DeviceGroupedConvBwdData : public BaseOperator
{
virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(void* p_input,
const void* p_weight,
const void* p_output,
const std::array<index_t, NDimSpatial + 3>& input_g_n_c_wis_lengths,
const std::array<index_t, NDimSpatial + 3>& input_g_n_c_wis_strides,
const std::array<index_t, NDimSpatial + 3>& weight_g_k_c_xs_lengths,
const std::array<index_t, NDimSpatial + 3>& weight_g_k_c_xs_strides,
const std::array<index_t, NDimSpatial + 3>& output_g_n_k_wos_lengths,
const std::array<index_t, NDimSpatial + 3>& output_g_n_k_wos_strides,
const std::array<index_t, NDimSpatial>& conv_filter_strides,
const std::array<index_t, NDimSpatial>& conv_filter_dilations,
const std::array<index_t, NDimSpatial>& input_left_pads,
const std::array<index_t, NDimSpatial>& input_right_pads,
const InputElementwiseOperation& input_element_op,
const WeightElementwiseOperation& weight_element_op,
const OutputElementwiseOperation& output_element_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
};
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -3,10 +3,9 @@ ...@@ -3,10 +3,9 @@
#pragma once #pragma once
#include <vector> #include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -63,100 +62,6 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator ...@@ -63,100 +62,6 @@ struct DeviceGroupedConvBwdDataMultipleD : public BaseOperator
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0; virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; };
template <ck::index_t NDimSpatial,
typename ALayout,
typename BLayout,
typename ELayout,
typename ADataType,
typename BDataType,
typename EDataType,
typename AElementwiseOperation,
typename BElementwiseOperation,
typename CDEElementwiseOperation>
struct DeviceGroupedConvBwdDataMultipleD<NDimSpatial,
ALayout,
BLayout,
Tuple<>,
ELayout,
ADataType,
BDataType,
Tuple<>,
EDataType,
AElementwiseOperation,
BElementwiseOperation,
CDEElementwiseOperation>
: public DeviceGroupedConvBwdData<NDimSpatial,
ELayout,
BLayout,
ALayout,
EDataType,
BDataType,
ADataType,
CDEElementwiseOperation,
BElementwiseOperation,
AElementwiseOperation>
{
virtual std::unique_ptr<BaseArgument> MakeArgumentPointer(
const void* p_a, // output image
const void* p_b, // weight
const std::array<const void*, 0>&, // bias
void* p_e, // input image
const std::array<index_t, NDimSpatial + 3>& a_g_n_k_wos_lengths, // output image
const std::array<index_t, NDimSpatial + 3>& a_g_n_k_wos_strides, // output image
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_lengths, // weight
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_strides, // weight
const std::array<std::array<index_t, NDimSpatial + 3>, 0>&, // bias
const std::array<std::array<index_t, NDimSpatial + 3>, 0>&, // bias
const std::array<index_t, NDimSpatial + 3>& e_g_n_c_wis_lengths, // input image
const std::array<index_t, NDimSpatial + 3>& e_g_n_c_wis_strides, // input image
const std::array<index_t, NDimSpatial>& conv_filter_strides,
const std::array<index_t, NDimSpatial>& conv_filter_dilations,
const std::array<index_t, NDimSpatial>& input_left_pads,
const std::array<index_t, NDimSpatial>& input_right_pads,
const AElementwiseOperation& a_element_op,
const BElementwiseOperation& b_element_op,
const CDEElementwiseOperation& cde_element_op) = 0;
std::unique_ptr<BaseArgument>
MakeArgumentPointer(void* p_input,
const void* p_weight,
const void* p_output,
const std::array<index_t, NDimSpatial + 3>& input_g_n_c_wis_lengths,
const std::array<index_t, NDimSpatial + 3>& input_g_n_c_wis_strides,
const std::array<index_t, NDimSpatial + 3>& weight_g_k_c_xs_lengths,
const std::array<index_t, NDimSpatial + 3>& weight_g_k_c_xs_strides,
const std::array<index_t, NDimSpatial + 3>& output_g_n_k_wos_lengths,
const std::array<index_t, NDimSpatial + 3>& output_g_n_k_wos_strides,
const std::array<index_t, NDimSpatial>& conv_filter_strides,
const std::array<index_t, NDimSpatial>& conv_filter_dilations,
const std::array<index_t, NDimSpatial>& input_left_pads,
const std::array<index_t, NDimSpatial>& input_right_pads,
const CDEElementwiseOperation& input_element_op,
const BElementwiseOperation& weight_element_op,
const AElementwiseOperation& output_element_op) override final
{
return MakeArgumentPointer(p_output,
p_weight,
std::array<const void*, 0>{},
p_input,
output_g_n_k_wos_lengths,
output_g_n_k_wos_strides,
weight_g_k_c_xs_lengths,
weight_g_k_c_xs_strides,
std::array<std::array<index_t, NDimSpatial + 3>, 0>{},
std::array<std::array<index_t, NDimSpatial + 3>, 0>{},
input_g_n_c_wis_lengths,
input_g_n_c_wis_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
output_element_op,
weight_element_op,
input_element_op);
}
};
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
} // namespace ck } // namespace ck
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#pragma once #pragma once
#include <vector> #include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
...@@ -11,7 +11,7 @@ namespace ck { ...@@ -11,7 +11,7 @@ namespace ck {
namespace tensor_operation { namespace tensor_operation {
namespace device { namespace device {
template <ck::index_t NumDimSpatial, template <ck::index_t NDimSpatial,
typename InLayout, typename InLayout,
typename WeiLayout, typename WeiLayout,
typename OutLayout, typename OutLayout,
...@@ -21,22 +21,23 @@ template <ck::index_t NumDimSpatial, ...@@ -21,22 +21,23 @@ template <ck::index_t NumDimSpatial,
typename InElementwiseOperation, typename InElementwiseOperation,
typename WeiElementwiseOperation, typename WeiElementwiseOperation,
typename OutElementwiseOperation> typename OutElementwiseOperation>
struct DeviceConvBwdWeight : public BaseOperator struct DeviceGroupedConvBwdWeight : public BaseOperator
{ {
virtual std::unique_ptr<BaseArgument> virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_in, MakeArgumentPointer(const void* p_in,
void* p_wei, void* p_wei,
const void* p_out, const void* p_out,
ck::index_t G,
ck::index_t N, ck::index_t N,
ck::index_t K, ck::index_t K,
ck::index_t C, ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths, std::array<ck::index_t, NDimSpatial> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths, std::array<ck::index_t, NDimSpatial> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths, std::array<ck::index_t, NDimSpatial> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides, std::array<ck::index_t, NDimSpatial> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::array<ck::index_t, NDimSpatial> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::array<ck::index_t, NDimSpatial> input_left_pads,
std::vector<ck::index_t> input_right_pads, std::array<ck::index_t, NDimSpatial> input_right_pads,
InElementwiseOperation in_element_op, InElementwiseOperation in_element_op,
WeiElementwiseOperation wei_element_op, WeiElementwiseOperation wei_element_op,
OutElementwiseOperation out_element_op, OutElementwiseOperation out_element_op,
......
...@@ -14,39 +14,38 @@ namespace device { ...@@ -14,39 +14,38 @@ namespace device {
// Convolution Forward: // Convolution Forward:
// input : input image A[G, N, C, Hi, Wi], // input : input image A[G, N, C, Hi, Wi],
// input : weight B[G, K, C, Y, X], // input : weight B[G, K, C, Y, X],
// input : D0[G, N, K, Ho, Wo], D1[G, N, K, Ho, Wo], ...
// output : output image E[G, N, K, Ho, Wo] // output : output image E[G, N, K, Ho, Wo]
// C = a_op(A) * b_op(B) // C = a_op(A) * b_op(B)
// E = cde_op(C, D0, D1, ...) // E = cde_op(C, D0, D1, ...)
template <index_t NDimSpatial, template <index_t NDimSpatial,
typename ALayout, typename InLayout,
typename BLayout, typename WeiLayout,
typename CLayout, typename OutLayout,
typename ADataType, typename InDataType,
typename BDataType, typename WeiDataType,
typename CDataType, typename OutDataType,
typename AElementwiseOperation, typename InElementwiseOperation,
typename BElementwiseOperation, typename WeiElementwiseOperation,
typename CElementwiseOperation> typename OutElementwiseOperation>
struct DeviceGroupedConvFwd : public BaseOperator struct DeviceGroupedConvFwd : public BaseOperator
{ {
virtual std::unique_ptr<BaseArgument> virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_a, // input image MakeArgumentPointer(const void* p_in, // input image
const void* p_b, // weight const void* p_wei, // weight
void* p_c, // output image void* p_out, // output image
const std::array<index_t, NDimSpatial + 3>& a_g_n_c_wis_lengths, const std::array<index_t, NDimSpatial + 3>& in_g_n_c_wis_lengths,
const std::array<index_t, NDimSpatial + 3>& a_g_n_c_wis_strides, const std::array<index_t, NDimSpatial + 3>& in_g_n_c_wis_strides,
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_lengths, const std::array<index_t, NDimSpatial + 3>& wei_g_k_c_xs_lengths,
const std::array<index_t, NDimSpatial + 3>& b_g_k_c_xs_strides, const std::array<index_t, NDimSpatial + 3>& wei_g_k_c_xs_strides,
const std::array<index_t, NDimSpatial + 3>& c_g_n_k_wos_lengths, const std::array<index_t, NDimSpatial + 3>& out_g_n_k_wos_lengths,
const std::array<index_t, NDimSpatial + 3>& c_g_n_k_wos_strides, const std::array<index_t, NDimSpatial + 3>& out_g_n_k_wos_strides,
const std::array<index_t, NDimSpatial>& conv_filter_strides, const std::array<index_t, NDimSpatial>& conv_filter_strides,
const std::array<index_t, NDimSpatial>& conv_filter_dilations, const std::array<index_t, NDimSpatial>& conv_filter_dilations,
const std::array<index_t, NDimSpatial>& input_left_pads, const std::array<index_t, NDimSpatial>& input_left_pads,
const std::array<index_t, NDimSpatial>& input_right_pads, const std::array<index_t, NDimSpatial>& input_right_pads,
const AElementwiseOperation& a_element_op, const InElementwiseOperation& in_element_op,
const BElementwiseOperation& b_element_op, const WeiElementwiseOperation& wei_element_op,
const CElementwiseOperation& c_element_op) = 0; const OutElementwiseOperation& out_element_op) = 0;
virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0; virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
}; };
......
...@@ -67,6 +67,8 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -67,6 +67,8 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
WeiElementwiseOperation, WeiElementwiseOperation,
OutElementwiseOperation> OutElementwiseOperation>
{ {
static constexpr ck::index_t NDimSpatial = 2;
using DeviceOp = using DeviceOp =
DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K; DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K;
...@@ -107,18 +109,18 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -107,18 +109,18 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
static constexpr auto BBlockLdsN0PerBlock = NPerBlock / BBlockLdsN1PerBlock; static constexpr auto BBlockLdsN0PerBlock = NPerBlock / BBlockLdsN1PerBlock;
static constexpr auto BBlockLdsN1Padding = 4; static constexpr auto BBlockLdsN1Padding = 4;
static auto static auto MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N(
MakeABCGridDescriptor_A_K0_M_K1_B_K0_N_K1_C_M_N(ck::index_t N, ck::index_t N,
ck::index_t K, ck::index_t K,
ck::index_t C, ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths, std::array<ck::index_t, NDimSpatial> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths, std::array<ck::index_t, NDimSpatial> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths, std::array<ck::index_t, NDimSpatial> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides, std::array<ck::index_t, NDimSpatial> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::array<ck::index_t, NDimSpatial> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::array<ck::index_t, NDimSpatial> input_left_pads,
std::vector<ck::index_t> input_right_pads, std::array<ck::index_t, NDimSpatial> input_right_pads,
ck::index_t batch_k) ck::index_t batch_k)
{ {
using namespace ck; using namespace ck;
...@@ -390,13 +392,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -390,13 +392,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck::index_t N, ck::index_t N,
ck::index_t K, ck::index_t K,
ck::index_t C, ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths, std::array<ck::index_t, NDimSpatial> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths, std::array<ck::index_t, NDimSpatial> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths, std::array<ck::index_t, NDimSpatial> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides, std::array<ck::index_t, NDimSpatial> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::array<ck::index_t, NDimSpatial> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::array<ck::index_t, NDimSpatial> input_left_pads,
std::vector<ck::index_t> input_right_pads, std::array<ck::index_t, NDimSpatial> input_right_pads,
ck::index_t M01, ck::index_t M01,
ck::index_t N01, ck::index_t N01,
InElementwiseOperation in_element_op, InElementwiseOperation in_element_op,
...@@ -473,11 +475,11 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -473,11 +475,11 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
index_t Conv_N_; index_t Conv_N_;
index_t Conv_K_; index_t Conv_K_;
index_t Conv_C_; index_t Conv_C_;
std::vector<index_t> output_spatial_lengths_; std::array<index_t, NDimSpatial> output_spatial_lengths_;
std::vector<index_t> filter_spatial_lengths_; std::array<index_t, NDimSpatial> filter_spatial_lengths_;
std::vector<index_t> conv_filter_strides_; std::array<index_t, NDimSpatial> conv_filter_strides_;
std::vector<index_t> input_left_pads_; std::array<index_t, NDimSpatial> input_left_pads_;
std::vector<index_t> input_right_pads_; std::array<index_t, NDimSpatial> input_right_pads_;
index_t k_batch_; index_t k_batch_;
}; };
...@@ -682,13 +684,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -682,13 +684,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck::index_t N, ck::index_t N,
ck::index_t K, ck::index_t K,
ck::index_t C, ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths, std::array<ck::index_t, NDimSpatial> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths, std::array<ck::index_t, NDimSpatial> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths, std::array<ck::index_t, NDimSpatial> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides, std::array<ck::index_t, NDimSpatial> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::array<ck::index_t, NDimSpatial> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::array<ck::index_t, NDimSpatial> input_left_pads,
std::vector<ck::index_t> input_right_pads, std::array<ck::index_t, NDimSpatial> input_right_pads,
InElementwiseOperation in_element_op, InElementwiseOperation in_element_op,
WeiElementwiseOperation wei_element_op, WeiElementwiseOperation wei_element_op,
OutElementwiseOperation out_element_op, OutElementwiseOperation out_element_op,
...@@ -724,13 +726,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_ ...@@ -724,13 +726,13 @@ struct DeviceConv2dBwdWeightXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_
ck::index_t N, ck::index_t N,
ck::index_t K, ck::index_t K,
ck::index_t C, ck::index_t C,
std::vector<ck::index_t> input_spatial_lengths, std::array<ck::index_t, NDimSpatial> input_spatial_lengths,
std::vector<ck::index_t> filter_spatial_lengths, std::array<ck::index_t, NDimSpatial> filter_spatial_lengths,
std::vector<ck::index_t> output_spatial_lengths, std::array<ck::index_t, NDimSpatial> output_spatial_lengths,
std::vector<ck::index_t> conv_filter_strides, std::array<ck::index_t, NDimSpatial> conv_filter_strides,
std::vector<ck::index_t> conv_filter_dilations, std::array<ck::index_t, NDimSpatial> conv_filter_dilations,
std::vector<ck::index_t> input_left_pads, std::array<ck::index_t, NDimSpatial> input_left_pads,
std::vector<ck::index_t> input_right_pads, std::array<ck::index_t, NDimSpatial> input_right_pads,
InElementwiseOperation in_element_op, InElementwiseOperation in_element_op,
WeiElementwiseOperation wei_element_op, WeiElementwiseOperation wei_element_op,
OutElementwiseOperation out_element_op, OutElementwiseOperation out_element_op,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment