Commit 4ec493ec authored by ltqin's avatar ltqin
Browse files

change some variable name

parent 1343569e
...@@ -43,7 +43,7 @@ message(STATUS "Build with HIP ${hip_VERSION}") ...@@ -43,7 +43,7 @@ message(STATUS "Build with HIP ${hip_VERSION}")
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}") message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
# CMAKE_CXX_FLAGS # CMAKE_CXX_FLAGS
SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV") SET(BUILD_DEV OFF CACHE BOOL "BUILD_DEV")
if(BUILD_DEV) if(BUILD_DEV)
string(APPEND CMAKE_CXX_FLAGS " -Werror -Weverything") string(APPEND CMAKE_CXX_FLAGS " -Werror -Weverything")
endif() endif()
......
...@@ -15,16 +15,16 @@ template <typename TInWei, ...@@ -15,16 +15,16 @@ template <typename TInWei,
typename InLeftPads, typename InLeftPads,
typename InRightPads> typename InRightPads>
void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk( void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk(
const InLengths& in_n_hi_wi_c_lengths, const InLengths& in_n_hi_wi_g_c_lengths,
const WeiLengths& wei_k_y_x_c_lengths, const WeiLengths& wei_g_k_y_x_c_lengths,
const OutLengths& out_n_ho_wo_k_lengths, const OutLengths& out_n_ho_wo_g_k_lengths,
const ConvStrides& conv_strides, const ConvStrides& conv_strides,
const ConvDilations& conv_dilations, const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads, const InLeftPads& in_left_pads,
const InRightPads& in_right_pads, const InRightPads& in_right_pads,
const Tensor<TInWei>& in_n_hi_wi_c, const Tensor<TInWei>& in_n_hi_wi_g_c,
const Tensor<TInWei>& wei_k_y_x_c, const Tensor<TInWei>& wei_g_k_y_x_c,
Tensor<TOut>& out_n_ho_wo_k, Tensor<TOut>& out_n_ho_wo_g_k,
ck::index_t nrepeat) ck::index_t nrepeat)
{ {
using namespace ck; using namespace ck;
...@@ -35,19 +35,19 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk( ...@@ -35,19 +35,19 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk(
constexpr auto I1 = Number<1>{}; constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{}; constexpr auto I2 = Number<2>{};
constexpr auto I3 = Number<3>{}; constexpr auto I3 = Number<3>{};
//constexpr auto I4 = Number<4>{}; constexpr auto I4 = Number<4>{};
DeviceMem in_n_hi_wi_c_device_buf(sizeof(TInWei) * in_n_hi_wi_c.mDesc.GetElementSpace()); DeviceMem in_n_hi_wi_g_c_device_buf(sizeof(TInWei) * in_n_hi_wi_g_c.mDesc.GetElementSpace());
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace()); DeviceMem wei_g_k_y_x_c_device_buf(sizeof(TInWei) * wei_g_k_y_x_c.mDesc.GetElementSpace());
DeviceMem out_n_ho_wo_k_device_buf(sizeof(TOut) * out_n_ho_wo_k.mDesc.GetElementSpace()); DeviceMem out_n_ho_wo_g_k_device_buf(sizeof(TOut) * out_n_ho_wo_g_k.mDesc.GetElementSpace());
in_n_hi_wi_c_device_buf.ToDevice(in_n_hi_wi_c.mData.data()); in_n_hi_wi_g_c_device_buf.ToDevice(in_n_hi_wi_g_c.mData.data());
wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data()); wei_g_k_y_x_c_device_buf.ToDevice(wei_g_k_y_x_c.mData.data());
out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data()); out_n_ho_wo_g_k_device_buf.ToDevice(out_n_ho_wo_g_k.mData.data());
const auto in_n_hi_wi_c_desc = make_naive_tensor_descriptor_packed(in_n_hi_wi_c_lengths); const auto in_n_hi_wi_g_c_desc = make_naive_tensor_descriptor_packed(in_n_hi_wi_g_c_lengths);
const auto wei_k_y_x_c_desc = make_naive_tensor_descriptor_packed(wei_k_y_x_c_lengths); const auto wei_g_k_y_x_c_desc = make_naive_tensor_descriptor_packed(wei_g_k_y_x_c_lengths);
const auto out_n_ho_wo_k_desc = make_naive_tensor_descriptor_packed(out_n_ho_wo_k_lengths); const auto out_n_ho_wo_g_k_desc = make_naive_tensor_descriptor_packed(out_n_ho_wo_g_k_lengths);
#if 0 #if 0
// [M, N, K0, K1] = [256, 128, 4, 4] for fp32 // [M, N, K0, K1] = [256, 128, 4, 4] for fp32
...@@ -176,14 +176,14 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk( ...@@ -176,14 +176,14 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk(
constexpr index_t MRepeat = 2; constexpr index_t MRepeat = 2;
constexpr index_t NRepeat = 4; constexpr index_t NRepeat = 4;
using GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1 = Sequence<1, 2, 8>; using GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1 = Sequence<1, 1, 2, 8>;
using GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1 = Sequence<4, 64, 1>; using GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1 = Sequence<1, 4, 64, 1>;
constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK1 = 8; constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK1 = 8;
constexpr index_t GemmABlockTransferDstScalarPerVector_GemmK1 = 8; constexpr index_t GemmABlockTransferDstScalarPerVector_GemmK1 = 8;
using GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1 = Sequence<1, 4, 8>; using GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1 = Sequence<1, 1, 4, 8>;
using GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1 = Sequence<4, 64, 1>; using GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1 = Sequence<1, 4, 64, 1>;
constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmK1 = 8; constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmK1 = 8;
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmK1 = 8; constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmK1 = 8;
...@@ -220,9 +220,9 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk( ...@@ -220,9 +220,9 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk(
#endif #endif
const auto descs = const auto descs =
transform_forward_convolution_into_gemm_v4r4r4_nhwgc_gkyxc_nhwgk_pad(in_n_hi_wi_c_desc, transform_forward_convolution_into_gemm_v4r4r4_nhwgc_gkyxc_nhwgk_pad(in_n_hi_wi_g_c_desc,
wei_k_y_x_c_desc, wei_g_k_y_x_c_desc,
out_n_ho_wo_k_desc, out_n_ho_wo_g_k_desc,
conv_strides, conv_strides,
conv_dilations, conv_dilations,
in_left_pads, in_left_pads,
...@@ -351,5 +351,5 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk( ...@@ -351,5 +351,5 @@ void device_convolution_forward_implicit_gemm_v4r4r4_xdlops_nhwgc_gkyxc_nhwgk(
} }
*/ */
// copy result back to host // copy result back to host
out_n_ho_wo_k_device_buf.FromDevice(out_n_ho_wo_k.mData.data()); out_n_ho_wo_g_k_device_buf.FromDevice(out_n_ho_wo_g_k.mData.data());
} }
...@@ -9,7 +9,7 @@ MY_PROJECT_INSTALL=../install.dir ...@@ -9,7 +9,7 @@ MY_PROJECT_INSTALL=../install.dir
cmake \ cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \ -D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D HALF_INCLUDE_DIR="/root/workspace/external/half/include" \ -D HALF_INCLUDE_DIR="/root/workspace/external/half/include" \
-D BUILD_DEV=ON \ -D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 -O3 --amdgpu-target=gfx908 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \ -D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 -O3 --amdgpu-target=gfx908 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment