"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "9dcd43dd244aa2e7c922b649e7daaf7298a2580c"
Unverified Commit ca47a6cf authored by rocking5566's avatar rocking5566 Committed by GitHub
Browse files

Do not hardcode the function parameter, use template instead. (#72)

* Do not hardcode the function parameter, use template instead.

* [What] Remove AThreadTransferSrcResetCoordinateAfterRun and BThreadTransferSrcResetCoordinateAfterRun in host API
[Why] "C_Shuffle" version is supposed to be similar to the vanilla one

* Fix typo
Let DeviceGemmXdl_C_Shuffle use kernel_gemm_xdlops_v3r1
parent 4d40b197
......@@ -156,20 +156,20 @@ struct DeviceGemmXdl_C_Shuffle
MXdlPerWave,
NXdlPerWave,
ABlockTransferThreadClusterLengths_K0_M_K1,
Sequence<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // ABlockTransferSrcAccessOrder,
2, // ABlockTransferSrcVectorDim,
ABlockTransferThreadClusterArrangeOrder,
ABlockTransferSrcAccessOrder,
ABlockTransferSrcVectorDim,
ABlockTransferSrcScalarPerVector,
ABlockTransferDstScalarPerVector_K1,
false, // AThreadTransferSrcResetCoordinateAfterRun,
false,
ABlockLdsAddExtraM,
BBlockTransferThreadClusterLengths_K0_N_K1,
Sequence<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // BBlockTransferSrcAccessOrder,
2, // BBlockTransferSrcVectorDim,
BBlockTransferThreadClusterArrangeOrder,
BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector,
BBlockTransferDstScalarPerVector_K1,
false, // BThreadTransferSrcResetCoordinateAfterRun,
false,
BBlockLdsAddExtraN,
CShuffleMXdlPerWavePerShuffle,
CShuffleNXdlPerWavePerShuffle,
......@@ -317,7 +317,7 @@ struct DeviceGemmXdl_C_Shuffle
}
else
{
const auto kernel = kernel_gemm_xdlops_v2r3<
const auto kernel = kernel_gemm_xdlops_v3r1<
GridwiseGemm,
ADataType, // TODO: distiguish A/B datatype
CDataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment