"external/vscode:/vscode.git/clone" did not exist on "82fae390fb3f8ebaaff5dfeae439b4a1a703d363"
Unverified Commit ca47a6cf authored by rocking5566's avatar rocking5566 Committed by GitHub
Browse files

Do not hardcode the function parameter, use template instead. (#72)

* Do not hardcode the function parameter, use template instead.

* [What] Remove AThreadTransferSrcResetCoordinateAfterRun and BThreadTransferSrcResetCoordinateAfterRun in host API
[Why] "C_Shuffle" version is supposed to be similar to the vanilla one

* Fix typo
Let DeviceGemmXdl_C_Shuffle use kernel_gemm_xdlops_v3r1
parent 4d40b197
...@@ -156,20 +156,20 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -156,20 +156,20 @@ struct DeviceGemmXdl_C_Shuffle
MXdlPerWave, MXdlPerWave,
NXdlPerWave, NXdlPerWave,
ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterLengths_K0_M_K1,
Sequence<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder, ABlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // ABlockTransferSrcAccessOrder, ABlockTransferSrcAccessOrder,
2, // ABlockTransferSrcVectorDim, ABlockTransferSrcVectorDim,
ABlockTransferSrcScalarPerVector, ABlockTransferSrcScalarPerVector,
ABlockTransferDstScalarPerVector_K1, ABlockTransferDstScalarPerVector_K1,
false, // AThreadTransferSrcResetCoordinateAfterRun, false,
ABlockLdsAddExtraM, ABlockLdsAddExtraM,
BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterLengths_K0_N_K1,
Sequence<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder, BBlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // BBlockTransferSrcAccessOrder, BBlockTransferSrcAccessOrder,
2, // BBlockTransferSrcVectorDim, BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector,
BBlockTransferDstScalarPerVector_K1, BBlockTransferDstScalarPerVector_K1,
false, // BThreadTransferSrcResetCoordinateAfterRun, false,
BBlockLdsAddExtraN, BBlockLdsAddExtraN,
CShuffleMXdlPerWavePerShuffle, CShuffleMXdlPerWavePerShuffle,
CShuffleNXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle,
...@@ -317,7 +317,7 @@ struct DeviceGemmXdl_C_Shuffle ...@@ -317,7 +317,7 @@ struct DeviceGemmXdl_C_Shuffle
} }
else else
{ {
const auto kernel = kernel_gemm_xdlops_v2r3< const auto kernel = kernel_gemm_xdlops_v3r1<
GridwiseGemm, GridwiseGemm,
ADataType, // TODO: distiguish A/B datatype ADataType, // TODO: distiguish A/B datatype
CDataType, CDataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment