Unverified Commit 31b40352 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Merge pull request #16 from ROCmSoftwarePlatform/develop

Merge develop into master
parents 5781adf5 b62bf8c3
#include "${KERNEL_SRC_HPP_FILENAME}"
......@@ -2,136 +2,150 @@
#define CONV_IGEMM_FWD_V6R1_DLOPS_NCHW_KCYX_NKHW_HPP
#include <numeric>
#include <sstream>
namespace ck_driver {
namespace ck {
namespace driver {
struct CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw
{
ck::DataTypeEnum_t ABDataTypeEnum;
ck::DataTypeEnum_t AccDataTypeEnum;
ck::DataTypeEnum_t CDataTypeEnum;
auto GetCompileParameterString() const
{
auto param = std::stringstream();
int BlockSize;
// clang-format off
param <<
" -DCK_PARAM_ABDataTypeEnum=" <<
ABDataTypeEnum <<
" -DCK_PARAM_AccDataTypeEnum=" <<
AccDataTypeEnum <<
" -DCK_PARAM_CDataTypeEnum=" <<
CDataTypeEnum <<
" -DCK_PARAM_BlockSize=" <<
BlockSize <<
" -DCK_PARAM_GN0=" <<
GN0 <<
" -DCK_PARAM_GK1=" <<
GK1 <<
" -DCK_PARAM_GM1PerBlockGM11="
<< GM1PerBlockGM11 <<
" -DCK_PARAM_GN1PerBlockGN11=" <<
GN1PerBlockGN11 <<
" -DCK_PARAM_GK0PerBlock=" <<
GK0PerBlock <<
" -DCK_PARAM_BM1PerThreadBM11=" <<
BM1PerThreadBM11 <<
" -DCK_PARAM_BN1PerThreadBN11=" <<
BN1PerThreadBN11 <<
" -DCK_PARAM_BK0PerThread=" <<
BK0PerThread <<
" -DCK_PARAM_BM10BN10ThreadClusterBM10Xs=" <<
BM10BN10ThreadClusterBM10Xs[0] << "," <<
BM10BN10ThreadClusterBM10Xs[1] <<
" -DCK_PARAM_BM10BN10ThreadClusterBN10Xs=" <<
BM10BN10ThreadClusterBN10Xs[0] << "," <<
BM10BN10ThreadClusterBN10Xs[1] <<
" -DCK_PARAM_ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1=" <<
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[0] << "," <<
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[1] << "," <<
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[2] << "," <<
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[3] << "," <<
ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[4] <<
" -DCK_PARAM_ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1=" <<
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[0] << "," <<
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[1] << "," <<
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[2] << "," <<
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[3] << "," <<
ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[4] <<
" -DCK_PARAM_ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" <<
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0] << "," <<
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1] << "," <<
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2] << "," <<
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3] << "," <<
ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4] <<
" -DCK_PARAM_ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" <<
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0] << "," <<
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1] << "," <<
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2] << "," <<
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3] << "," <<
ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4] <<
" -DCK_PARAM_BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1=" <<
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[0] << "," <<
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[1] << "," <<
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[2] << "," <<
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[3] << "," <<
BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[4] <<
" -DCK_PARAM_BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1=" <<
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[0] << "," <<
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[1] << "," <<
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[2] << "," <<
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[3] << "," <<
BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[4] <<
" -DCK_PARAM_BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" <<
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0] << "," <<
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1] << "," <<
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2] << "," <<
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3] << "," <<
BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4] <<
" -DCK_PARAM_BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" <<
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0] << "," <<
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1] << "," <<
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2] << "," <<
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3] << "," <<
BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4] <<
" -DCK_PARAM_CThreadTransferDstScalarPerVector=" <<
CThreadTransferDstScalarPerVector <<
" -DCK_PARAM_HasMainKBlockLoop=" <<
static_cast<int>(HasMainKBlockLoop) <<
" -DCK_PARAM_HasDoubleTailKBlockLoop=" <<
static_cast<int>(HasDoubleTailKBlockLoop);
// clang-format on
int GN0;
int GK1;
return param.str();
}
int GM1PerBlockGM11;
int GN1PerBlockGN11;
int GK0PerBlock;
ck::DataTypeEnum_t ABDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t AccDataTypeEnum = ck::DataTypeEnum_t::Unknown;
ck::DataTypeEnum_t CDataTypeEnum = ck::DataTypeEnum_t::Unknown;
int BM1PerThreadBM11;
int BN1PerThreadBN11;
int BK0PerThread;
int BlockSize = -1;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs;
std::array<int, 2> BM10BN10ThreadClusterBN10Xs;
int GN0 = -1;
int GK1 = -1;
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1;
int GM1PerBlockGM11 = -1;
int GN1PerBlockGN11 = -1;
int GK0PerBlock = -1;
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1;
int BM1PerThreadBM11 = -1;
int BN1PerThreadBN11 = -1;
int BK0PerThread = -1;
int CThreadTransferDstScalarPerVector;
std::array<int, 2> BM10BN10ThreadClusterBM10Xs = {-1, -1};
std::array<int, 2> BM10BN10ThreadClusterBN10Xs = {-1, -1};
bool HasMainKBlockLoop;
bool HasDoubleTailKBlockLoop;
std::array<int, 5> ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1 = {
-1, -1, -1, -1, -1};
auto GetCompileParameterString() const
{
// clang-format off
return
" -DCK_PARAM_ABDataTypeEnum=" +
std::to_string(ABDataTypeEnum) +
" -DCK_PARAM_AccDataTypeEnum=" +
std::to_string(AccDataTypeEnum) +
" -DCK_PARAM_CDataTypeEnum=" +
std::to_string(CDataTypeEnum) +
" -DCK_PARAM_BlockSize=" +
std::to_string(BlockSize) +
" -DCK_PARAM_GN0=" +
std::to_string(GN0) +
" -DCK_PARAM_GK1=" +
std::to_string(GK1) +
" -DCK_PARAM_GM1PerBlockGM11=" +
std::to_string(GM1PerBlockGM11) +
" -DCK_PARAM_GN1PerBlockGN11=" +
std::to_string(GN1PerBlockGN11) +
" -DCK_PARAM_GK0PerBlock=" +
std::to_string(GK0PerBlock) +
" -DCK_PARAM_BM1PerThreadBM11=" +
std::to_string(BM1PerThreadBM11) +
" -DCK_PARAM_BN1PerThreadBN11=" +
std::to_string(BN1PerThreadBN11) +
" -DCK_PARAM_BK0PerThread=" +
std::to_string(BK0PerThread) +
" -DCK_PARAM_BM10BN10ThreadClusterBM10Xs=" +
std::to_string(BM10BN10ThreadClusterBM10Xs[0]) + "," +
std::to_string(BM10BN10ThreadClusterBM10Xs[1]) +
" -DCK_PARAM_BM10BN10ThreadClusterBN10Xs=" +
std::to_string(BM10BN10ThreadClusterBN10Xs[0]) + "," +
std::to_string(BM10BN10ThreadClusterBN10Xs[1]) +
" -DCK_PARAM_ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1=" +
std::to_string(ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[0]) + "," +
std::to_string(ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[1]) + "," +
std::to_string(ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[2]) + "," +
std::to_string(ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[3]) + "," +
std::to_string(ABlockTransferThreadSliceLengths_GK0_GM0_GM10_GM11_GK1[4]) +
" -DCK_PARAM_ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1=" +
std::to_string(ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[0]) + "," +
std::to_string(ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[1]) + "," +
std::to_string(ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[2]) + "," +
std::to_string(ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[3]) + "," +
std::to_string(ABlockTransferThreadClusterLengths_GK0_GM0_GM10_GM11_GK1[4]) +
" -DCK_PARAM_ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" +
std::to_string(ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0]) + "," +
std::to_string(ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1]) + "," +
std::to_string(ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2]) + "," +
std::to_string(ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3]) + "," +
std::to_string(ABlockTransferSrcVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4]) +
" -DCK_PARAM_ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1=" +
std::to_string(ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[0]) + "," +
std::to_string(ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[1]) + "," +
std::to_string(ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[2]) + "," +
std::to_string(ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[3]) + "," +
std::to_string(ABlockTransferDstVectorTensorLengths_GK0_GM0_GM10_GM11_GK1[4]) +
" -DCK_PARAM_BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1=" +
std::to_string(BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[0]) + "," +
std::to_string(BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[1]) + "," +
std::to_string(BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[2]) + "," +
std::to_string(BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[3]) + "," +
std::to_string(BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1[4]) +
" -DCK_PARAM_BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1=" +
std::to_string(BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[0]) + "," +
std::to_string(BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[1]) + "," +
std::to_string(BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[2]) + "," +
std::to_string(BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[3]) + "," +
std::to_string(BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1[4]) +
" -DCK_PARAM_BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" +
std::to_string(BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0]) + "," +
std::to_string(BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1]) + "," +
std::to_string(BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2]) + "," +
std::to_string(BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3]) + "," +
std::to_string(BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4]) +
" -DCK_PARAM_BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1=" +
std::to_string(BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[0]) + "," +
std::to_string(BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[1]) + "," +
std::to_string(BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[2]) + "," +
std::to_string(BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[3]) + "," +
std::to_string(BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1[4]) +
" -DCK_PARAM_CThreadTransferDstScalarPerVector=" +
std::to_string(CThreadTransferDstScalarPerVector) +
" -DCK_PARAM_HasMainKBlockLoop=" +
std::to_string(HasMainKBlockLoop) +
" -DCK_PARAM_HasDoubleTailKBlockLoop=" +
std::to_string(HasDoubleTailKBlockLoop);
// clang-format on
}
std::array<int, 5> BBlockTransferThreadSliceLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferThreadClusterLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferSrcVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
std::array<int, 5> BBlockTransferDstVectorTensorLengths_GK0_GN0_GN10_GN11_GK1 = {
-1, -1, -1, -1, -1};
int CThreadTransferDstScalarPerVector = -1;
bool HasMainKBlockLoop = false;
bool HasDoubleTailKBlockLoop = false;
};
struct TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw
......@@ -229,8 +243,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
CalculateCompileParameterBasedOnTunable(const ConvolutionProblemDescriptor& conv_problem_desc,
const TunableConvIgemmFwdV6r1DlopsNchwKcyxNkhw& tunable)
{
using namespace ck;
const int C = conv_problem_desc.C;
const int Y = conv_problem_desc.Y;
const int X = conv_problem_desc.X;
......@@ -247,12 +259,17 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
DataTypeEnum_t AccDataTypeEnum;
switch(ABDataTypeEnum)
if(ABDataTypeEnum == DataTypeEnum_t::Float || ABDataTypeEnum == DataTypeEnum_t::Half)
{
AccDataTypeEnum = DataTypeEnum_t::Float;
}
else if(ABDataTypeEnum == DataTypeEnum_t::Int8)
{
case DataTypeEnum_t::Float:
case DataTypeEnum_t::Half: AccDataTypeEnum = DataTypeEnum_t::Float; break;
case DataTypeEnum_t::Int8: AccDataTypeEnum = DataTypeEnum_t::Int32; break;
default: return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false);
AccDataTypeEnum = DataTypeEnum_t::Int32;
}
else
{
return std::make_tuple(CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw{}, false);
}
const int BlockSize = tunable.BlockSize;
......@@ -342,7 +359,7 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
{
for(const auto& tunable : generate_tunable_list_conv_igemm_fwd_v6r1_dlops_nchw_kcyx_nkhw())
{
CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param;
CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw compile_param{};
bool found = false;
std::tie(compile_param, found) =
......@@ -368,8 +385,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
IsValidCompileParameter(const ConvolutionProblemDescriptor& conv_problem_desc,
const CompileParameterConvIgemmFwdV6r1DlopsNchwKcyxNkhw& compile_param)
{
using namespace ck;
const int N = conv_problem_desc.N;
const int K = conv_problem_desc.K;
const int C = conv_problem_desc.C;
......@@ -669,5 +684,6 @@ struct ConvIgemmFwdV6r1DlopsNchwKcyxNkhw
}
};
} // namespace ck_driver
} // namespace driver
} // namespace ck
#endif
#ifndef CONVOLUTION_PROBLEM_DESCRIPTOR
#define CONVOLUTION_PROBLEM_DESCRIPTOR
namespace ck_driver {
namespace ck {
namespace driver {
struct ConvolutionProblemDescriptor
{
......@@ -75,5 +76,6 @@ struct ConvolutionProblemDescriptor
std::size_t CalculateFlop() const { return 2L * N * K * C * Y * X * Ho * Wo; }
};
} // namespace ck_driver
} // namespace driver
} // namespace ck
#endif
#ifndef ONLINE_DRIVER_COMMON_HPP
#define ONLINE_DRIVER_COMMON_HPP
#ifndef CK_SOLVER_COMMON_HPP
#define CK_SOLVER_COMMON_HPP
namespace ck_driver {
inline auto get_ck_hip_online_compile_common_flag()
{
std::string param = " -std=c++17";
return param;
}
namespace ck {
namespace driver {
// greatest common divisor, aka highest common factor
inline int gcd(int x, int y)
......@@ -47,5 +41,6 @@ auto gcd(X x, Ys... ys)
return gcd(x, gcd(ys...));
}
} // namespace ck_driver
} // namespace driver
} // namespace ck
#endif
......@@ -3,40 +3,16 @@ rm -f CMakeCache.txt
rm -f *.cmake
rm -rf CMakeFiles
MY_PROJECT_SOURCE=../../../
MY_PROJECT_SOURCE=../../..
MY_PROJECT_INSTALL=../install.dir
cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D HALF_INCLUDE_DIR="/root/workspace/external/half/include" \
-D BUILD_DEV=ON \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX906 -O3 --amdgpu-target=gfx906 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
-D HIP_ONLINE_COMPILER_FLAGS="-DCK_AMD_GPU_GFX906" \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 -O3 --amdgpu-target=gfx908 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE}
#CXX_FLAG_TMP=-Weverything
# -Wno-c++98-compat \
# -Wno-c++98-compat-pedantic \
# -Wno-conversion \
# -Wno-double-promotion \
# -Wno-exit-time-destructors \
# -Wno-extra-semi \
# -Wno-float-conversion \
# -Wno-gnu-anonymous-struct \
# -Wno-gnu-zero-variadic-macro-arguments \
# -Wno-missing-noreturn \
# -Wno-missing-prototypes \
# -Wno-nested-anon-types \
# -Wno-padded \
# -Wno-return-std-move-in-c++11 \
# -Wno-shorten-64-to-32 \
# -Wno-sign-conversion \
# -Wno-unknown-warning-option \
# -Wno-unused-command-line-argument \
# -Wno-weak-vtables \
# -Wno-covered-switch-default \
# -Wno-disabled-macro-expansion \
# -Wno-undefined-reinterpret-cast
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment