"src/graph/vscode:/vscode.git/clone" did not exist on "e34a507292bf743c20a1b7af2bcf28457673eda3"
Commit d1842890 authored by Chao Liu's avatar Chao Liu
Browse files

tidy

parent f885c131
...@@ -38,6 +38,10 @@ link_libraries(${OpenMP_pthread_LIBRARY}) ...@@ -38,6 +38,10 @@ link_libraries(${OpenMP_pthread_LIBRARY})
find_package(HIP REQUIRED) find_package(HIP REQUIRED)
message(STATUS "Build with HIP ${hip_VERSION}") message(STATUS "Build with HIP ${hip_VERSION}")
## half
#find_path(HALF_INCLUDE_DIR half.hpp)
#message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
## tidy ## tidy
include(EnableCompilerWarnings) include(EnableCompilerWarnings)
set(MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name) set(MIOPEN_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
......
...@@ -203,9 +203,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -203,9 +203,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
__host__ __device__ static constexpr auto __host__ __device__ static constexpr auto
MakeCM0M1M2NGridDescriptor(const CMNGridDesc& c_m_n_grid_desc) MakeCM0M1M2NGridDescriptor(const CMNGridDesc& c_m_n_grid_desc)
{ {
const auto M = c_m_n_grid_desc.GetLength(I0);
const auto N = c_m_n_grid_desc.GetLength(I1);
constexpr auto xdlops_gemm = XdlopsGemm<FloatAB, MPerWave, NPerWave, K1>{}; constexpr auto xdlops_gemm = XdlopsGemm<FloatAB, MPerWave, NPerWave, K1>{};
constexpr auto CLayout = xdlops_gemm.GetCLayout(); constexpr auto CLayout = xdlops_gemm.GetCLayout();
...@@ -217,7 +214,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -217,7 +214,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
constexpr index_t MWaves = MPerBlock / (MPerWave * MRepeat); constexpr index_t MWaves = MPerBlock / (MPerWave * MRepeat);
constexpr index_t NWaves = NPerBlock / (NPerWave * NRepeat); constexpr index_t NWaves = NPerBlock / (NPerWave * NRepeat);
constexpr auto N0 = Number<CLayout.N1()>{};
constexpr auto N1 = Number<CLayout.N0()>{}; constexpr auto N1 = Number<CLayout.N0()>{};
const auto c_m0_m1_m2_n_grid_desc = transform_dynamic_tensor_descriptor( const auto c_m0_m1_m2_n_grid_desc = transform_dynamic_tensor_descriptor(
...@@ -277,8 +273,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3 ...@@ -277,8 +273,6 @@ struct GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
p_c_grid, c_m0_m1_m2_n_grid_desc.GetElementSpaceSize()); p_c_grid, c_m0_m1_m2_n_grid_desc.GetElementSpaceSize());
const auto K0 = a_k0_m_k1_grid_desc.GetLength(I0); const auto K0 = a_k0_m_k1_grid_desc.GetLength(I0);
const auto M = a_k0_m_k1_grid_desc.GetLength(I1);
const auto N = b_k0_n_k1_grid_desc.GetLength(I1);
// divide block work by [M, N] // divide block work by [M, N]
const auto block_work_idx = const auto block_work_idx =
......
...@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx ...@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
constexpr auto I1 = Number<1>{}; constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{}; constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{}; constexpr auto I3 = Number<3>{};
constexpr auto I4 = Number<4>{};
constexpr auto I5 = Number<5>{};
constexpr auto I6 = Number<6>{};
constexpr auto I7 = Number<7>{};
constexpr auto I8 = Number<8>{};
DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace()); DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace());
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace()); DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
...@@ -319,16 +314,13 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx ...@@ -319,16 +314,13 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1_xdlops_nhwc_kyx
const auto K = out_n_ho_wo_k_lengths[I3]; const auto K = out_n_ho_wo_k_lengths[I3];
const auto C = wei_k_y_x_c_lengths[I3]; const auto C = wei_k_y_x_c_lengths[I3];
const auto Hi = in_n_hi_wi_c_lengths[I1];
const auto Wi = in_n_hi_wi_c_lengths[I2];
const auto Ho = out_n_ho_wo_k_lengths[I1]; const auto Ho = out_n_ho_wo_k_lengths[I1];
const auto Wo = out_n_ho_wo_k_lengths[I2]; const auto Wo = out_n_ho_wo_k_lengths[I2];
const auto Y = wei_k_y_x_c_lengths[I1]; const auto Y = wei_k_y_x_c_lengths[I1];
const auto X = wei_k_y_x_c_lengths[I2]; const auto X = wei_k_y_x_c_lengths[I2];
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) / float perf = static_cast<float>((std::size_t(2) * N * K * Ho * Wo * C * Y * X)) /
(std::size_t(1000) * 1000 * 1000) / ave_time; (std::size_t(1000) * 1000 * 1000) / ave_time;
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s" std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"
......
...@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k ...@@ -35,11 +35,6 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
constexpr auto I1 = Number<1>{}; constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{}; constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{}; constexpr auto I3 = Number<3>{};
constexpr auto I4 = Number<4>{};
constexpr auto I5 = Number<5>{};
constexpr auto I6 = Number<6>{};
constexpr auto I7 = Number<7>{};
constexpr auto I8 = Number<8>{};
DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace()); DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace());
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace()); DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
...@@ -304,7 +299,7 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k ...@@ -304,7 +299,7 @@ void device_dynamic_convolution_backward_data_implicit_gemm_v4r1r2_xdlops_nhwc_k
const auto Y = wei_k_y_x_c_lengths[I1]; const auto Y = wei_k_y_x_c_lengths[I1];
const auto X = wei_k_y_x_c_lengths[I2]; const auto X = wei_k_y_x_c_lengths[I2];
float perf = (float)(std::size_t(2) * N * K * Ho * Wo * C * Y * X) / float perf = static_cast<float>((std::size_t(2) * N * K * Ho * Wo * C * Y * X)) /
(std::size_t(1000) * 1000 * 1000) / ave_time; (std::size_t(1000) * 1000 * 1000) / ave_time;
std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s" std::cout << "Average time : " << ave_time << " ms, " << perf << " TFlop/s"
......
...@@ -277,8 +277,6 @@ int main(int argc, char* argv[]) ...@@ -277,8 +277,6 @@ int main(int argc, char* argv[])
in_right_pads_dev); in_right_pads_dev);
}; };
const auto nhwc_desc = f_make_for_device_nhwc();
#if USE_CONV_BWD_V4R1_XDL_NHWC #if USE_CONV_BWD_V4R1_XDL_NHWC
if(algo == ConvBackwardDataAlgo::V4R1XDLNHWC) if(algo == ConvBackwardDataAlgo::V4R1XDLNHWC)
{ {
......
...@@ -20,12 +20,12 @@ ...@@ -20,12 +20,12 @@
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp" #include "device_dynamic_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwc_kyxc_nhwk.hpp"
#define USE_DYNAMIC_MODE 1 #define USE_DYNAMIC_MODE 1
#define USE_CONV_FWD_V4R4_NCHW 1 #define USE_CONV_FWD_V4R4_NCHW 0
#define USE_CONV_FWD_V4R4R2_NHWC 1 #define USE_CONV_FWD_V4R4R2_NHWC 0
#define USE_CONV_FWD_V6R1_NCHW 1 #define USE_CONV_FWD_V6R1_NCHW 0
#define USE_CONV_FWD_V5R1_NCHW 0 #define USE_CONV_FWD_V5R1_NCHW 0
#define USE_CONV_FWD_V4R4R2_XDL_NCHW 0 #define USE_CONV_FWD_V4R4R2_XDL_NCHW 0
#define USE_CONV_FWD_V4R4R4_XDL_NHWC 0 #define USE_CONV_FWD_V4R4R4_XDL_NHWC 1
enum ConvForwardAlgo enum ConvForwardAlgo
{ {
......
...@@ -10,6 +10,8 @@ set(HOST_TENSOR_SOURCE ...@@ -10,6 +10,8 @@ set(HOST_TENSOR_SOURCE
## the library target ## the library target
add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE}) add_library(host_tensor SHARED ${HOST_TENSOR_SOURCE})
#target_include_directories(host_tensor SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
target_link_libraries(host_tensor PRIVATE hip::device) target_link_libraries(host_tensor PRIVATE hip::device)
target_link_libraries(host_tensor INTERFACE hip::host) target_link_libraries(host_tensor INTERFACE hip::host)
......
...@@ -14,7 +14,7 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in, ...@@ -14,7 +14,7 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
const ConvStrides& conv_strides, const ConvStrides& conv_strides,
const ConvDilations& conv_dilations, const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads, const InLeftPads& in_left_pads,
const InRightPads& in_right_pads, const InRightPads& /* in_right_pads */,
const ConvTensorLayout layout = ConvTensorLayout::NCHW) const ConvTensorLayout layout = ConvTensorLayout::NCHW)
{ {
using namespace ck; using namespace ck;
...@@ -25,11 +25,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in, ...@@ -25,11 +25,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
constexpr auto I3 = Number<3>{}; constexpr auto I3 = Number<3>{};
auto f_nchw = [&](auto n, auto c, auto hi, auto wi) { auto f_nchw = [&](auto n, auto c, auto hi, auto wi) {
std::size_t N = in.mDesc.GetLengths()[I0];
std::size_t C = in.mDesc.GetLengths()[I1];
std::size_t Hi = in.mDesc.GetLengths()[I2];
std::size_t Wi = in.mDesc.GetLengths()[I3];
std::size_t K = wei.mDesc.GetLengths()[I0]; std::size_t K = wei.mDesc.GetLengths()[I0];
std::size_t Y = wei.mDesc.GetLengths()[I2]; std::size_t Y = wei.mDesc.GetLengths()[I2];
std::size_t X = wei.mDesc.GetLengths()[I3]; std::size_t X = wei.mDesc.GetLengths()[I3];
...@@ -74,11 +69,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in, ...@@ -74,11 +69,6 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
}; };
auto f_nhwc = [&](auto n, auto hi, auto wi, auto c) { auto f_nhwc = [&](auto n, auto hi, auto wi, auto c) {
std::size_t N = in.mDesc.GetLengths()[I0];
std::size_t Hi = in.mDesc.GetLengths()[I1];
std::size_t Wi = in.mDesc.GetLengths()[I2];
std::size_t C = in.mDesc.GetLengths()[I3];
std::size_t K = wei.mDesc.GetLengths()[I0]; std::size_t K = wei.mDesc.GetLengths()[I0];
std::size_t Y = wei.mDesc.GetLengths()[I1]; std::size_t Y = wei.mDesc.GetLengths()[I1];
std::size_t X = wei.mDesc.GetLengths()[I2]; std::size_t X = wei.mDesc.GetLengths()[I2];
...@@ -122,22 +112,24 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in, ...@@ -122,22 +112,24 @@ void host_direct_convolution_backward_data(Tensor<TIn>& in,
in(n, hi, wi, c) = v; in(n, hi, wi, c) = v;
}; };
switch(layout) if(layout == ConvTensorLayout::NCHW)
{ {
case ConvTensorLayout::NCHW:
make_ParallelTensorFunctor(f_nchw, make_ParallelTensorFunctor(f_nchw,
in.mDesc.GetLengths()[0], in.mDesc.GetLengths()[0],
in.mDesc.GetLengths()[1], in.mDesc.GetLengths()[1],
in.mDesc.GetLengths()[2], in.mDesc.GetLengths()[2],
in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency()); in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break; }
case ConvTensorLayout::NHWC: else if(layout == ConvTensorLayout::NHWC)
{
make_ParallelTensorFunctor(f_nhwc, make_ParallelTensorFunctor(f_nhwc,
in.mDesc.GetLengths()[0], in.mDesc.GetLengths()[0],
in.mDesc.GetLengths()[1], in.mDesc.GetLengths()[1],
in.mDesc.GetLengths()[2], in.mDesc.GetLengths()[2],
in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency()); in.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
break; }
default: throw std::runtime_error("wrong! not supported layout"); else
{
throw std::runtime_error("wrong! not supported layout");
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment