Commit e62bae7a authored by Chao Liu's avatar Chao Liu
Browse files

tidy

parent 24c87289
...@@ -169,28 +169,30 @@ enable_cppcheck( ...@@ -169,28 +169,30 @@ enable_cppcheck(
unusedPrivateFunction unusedPrivateFunction
unusedStructMember unusedStructMember
# Ignore initializer lists in the tests # Ignore initializer lists in the tests
useInitializationList:*test/*.cpp #useInitializationList:*test/*.cpp
*:*src/sqlite/*.cpp #*:*src/sqlite/*.cpp
*:*.cl #*:*.cl
*:*src/kernels/*.h #*:*src/kernels/*.h
knownConditionTrueFalse:*src/kernels/composable_kernel/*/* #knownConditionTrueFalse:*src/kernels/composable_kernel/*/*
redundantAssignment:*src/kernels/composable_kernel/*/* #redundantAssignment:*src/kernels/composable_kernel/*/*
unreadVariable:*src/kernels/composable_kernel/*/* #unreadVariable:*src/kernels/composable_kernel/*/*
unusedScopedObject:*src/kernels/composable_kernel/*/* #unusedScopedObject:*src/kernels/composable_kernel/*/*
wrongPrintfScanfArgNum:*src/kernels/composable_kernel/*/* #wrongPrintfScanfArgNum:*src/kernels/composable_kernel/*/*
unmatchedSuppression unmatchedSuppression
FORCE FORCE
SOURCES SOURCES
host/host_tensor/src host/host_tensor/src
host/driver_offline host/driver_offline/src
composable_kernel/src composable_kernel/src/kernel_wrapper
INCLUDE INCLUDE
host/host_tensor/include
host/solver/include
host/driver_offline/include
composable_kernel/include/*
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src/include
DEFINE DEFINE
CPPCHECK=1 CPPCHECK=1
MIOPEN_USE_MIOPENGEMM=1
__linux__=1 __linux__=1
) )
......
...@@ -12,8 +12,8 @@ include_directories(BEFORE ...@@ -12,8 +12,8 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/external/half/include ${PROJECT_SOURCE_DIR}/external/half/include
) )
set(CONV_FWD_DRIVER_OFFLINE_SOURCE conv_fwd_driver_offline.cpp) set(CONV_FWD_DRIVER_OFFLINE_SOURCE src/conv_fwd_driver_offline.cpp)
set(CONV_BWD_DRIVER_OFFLINE_SOURCE conv_bwd_driver_offline.cpp) set(CONV_BWD_DRIVER_OFFLINE_SOURCE src/conv_bwd_driver_offline.cpp)
add_executable(conv_fwd_driver_offline ${CONV_FWD_DRIVER_OFFLINE_SOURCE}) add_executable(conv_fwd_driver_offline ${CONV_FWD_DRIVER_OFFLINE_SOURCE})
add_executable(conv_bwd_driver_offline ${CONV_BWD_DRIVER_OFFLINE_SOURCE}) add_executable(conv_bwd_driver_offline ${CONV_BWD_DRIVER_OFFLINE_SOURCE})
......
...@@ -8,41 +8,6 @@ namespace driver { ...@@ -8,41 +8,6 @@ namespace driver {
struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
{ {
ck::DataTypeEnum_t ABDataTypeEnum;
ck::DataTypeEnum_t AccDataTypeEnum;
ck::DataTypeEnum_t CDataTypeEnum;
int BlockSize;
int GN0;
int GK1;
int GM1PerBlockGM11;
int GN1PerBlockGN11;
int GK0PerBlock;
int BM1PerThreadBM11;
int BN1PerThreadBN11;
int BK0PerThread;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs;
std::array<int, 2> BM10BN10ThreadClusterBN10Xs;
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
int CThreadTransferDstScalarPerVector;
bool HasMainKBlockLoop;
bool HasDoubleTailKBlockLoop;
auto GetCompileParameterString() const auto GetCompileParameterString() const
{ {
// clang-format off // clang-format off
...@@ -128,11 +93,46 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -128,11 +93,46 @@ struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
" -DCK_PARAM_CThreadTransferDstScalarPerVector=" + " -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
std::to_string(CThreadTransferDstScalarPerVector) + std::to_string(CThreadTransferDstScalarPerVector) +
" -DCK_PARAM_HasMainKBlockLoop=" + " -DCK_PARAM_HasMainKBlockLoop=" +
std::to_string(HasMainKBlockLoop) + std::to_string(static_cast<int>(HasMainKBlockLoop)) +
" -DCK_PARAM_HasDoubleTailKBlockLoop=" + " -DCK_PARAM_HasDoubleTailKBlockLoop=" +
std::to_string(HasDoubleTailKBlockLoop); std::to_string(static_cast<int>(HasDoubleTailKBlockLoop));
// clang-format on // clang-format on
} }
ck::DataTypeEnum_t ABDataTypeEnum;
ck::DataTypeEnum_t AccDataTypeEnum;
ck::DataTypeEnum_t CDataTypeEnum;
int BlockSize;
int GN0;
int GK1;
int GM1PerBlockGM11;
int GN1PerBlockGN11;
int GK0PerBlock;
int BM1PerThreadBM11;
int BN1PerThreadBN11;
int BK0PerThread;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs;
std::array<int, 2> BM10BN10ThreadClusterBN10Xs;
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
int CThreadTransferDstScalarPerVector;
bool HasMainKBlockLoop;
bool HasDoubleTailKBlockLoop;
}; };
struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
...@@ -230,8 +230,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -230,8 +230,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
CalculateCompileParameterBasedOnTunable(const ConvolutionProblemDescriptor& conv_problem_desc, CalculateCompileParameterBasedOnTunable(const ConvolutionProblemDescriptor& conv_problem_desc,
const TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw& tunable) const TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw& tunable)
{ {
using namespace ck;
const int C = conv_problem_desc.C; const int C = conv_problem_desc.C;
const int Y = conv_problem_desc.Y; const int Y = conv_problem_desc.Y;
const int X = conv_problem_desc.X; const int X = conv_problem_desc.X;
...@@ -248,12 +246,17 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -248,12 +246,17 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
DataTypeEnum_t AccDataTypeEnum; DataTypeEnum_t AccDataTypeEnum;
switch(ABDataTypeEnum) if(ABDataTypeEnum == DataTypeEnum_t::Float || ABDataTypeEnum == DataTypeEnum_t::Half)
{
AccDataTypeEnum = DataTypeEnum_t::Float;
}
else if(ABDataTypeEnum == DataTypeEnum_t::Int8)
{ {
case DataTypeEnum_t::Float: AccDataTypeEnum = DataTypeEnum_t::Int32;
case DataTypeEnum_t::Half: AccDataTypeEnum = DataTypeEnum_t::Float; break; }
case DataTypeEnum_t::Int8: AccDataTypeEnum = DataTypeEnum_t::Int32; break; else
default: return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false); {
return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false);
} }
const int BlockSize = tunable.BlockSize; const int BlockSize = tunable.BlockSize;
...@@ -343,7 +346,7 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -343,7 +346,7 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
{ {
for(const auto& tunable : generate_tunable_list_conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw()) for(const auto& tunable : generate_tunable_list_conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw())
{ {
CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param; CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param{};
bool found = false; bool found = false;
std::tie(compile_param, found) = std::tie(compile_param, found) =
...@@ -369,8 +372,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw ...@@ -369,8 +372,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
IsValidCompileParameter(const ConvolutionProblemDescriptor& conv_problem_desc, IsValidCompileParameter(const ConvolutionProblemDescriptor& conv_problem_desc,
const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw& compile_param) const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw& compile_param)
{ {
using namespace ck;
const int N = conv_problem_desc.N; const int N = conv_problem_desc.N;
const int K = conv_problem_desc.K; const int K = conv_problem_desc.K;
const int C = conv_problem_desc.C; const int C = conv_problem_desc.C;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment