Commit 162359b6 authored by ltqin's avatar ltqin
Browse files

run simple example

parent 4d2172a9
......@@ -465,7 +465,7 @@ struct DeviceConv2dWrWXdl_C_Shuffle_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_W
static bool IsSupportedArgument(const Argument& arg)
{
// vector load A/B matrix from global memory
if(!(ABlockTransferSrcVectorDim == 1 && BBlockTransferSrcVectorDim == 2 &&
if(!(ABlockTransferSrcVectorDim == 1 && BBlockTransferSrcVectorDim == 1 &&
arg.Conv_K_ % ABlockTransferSrcScalarPerVector == 0 &&
arg.Conv_C_ % BBlockTransferSrcScalarPerVector == 0))
{
......
......@@ -51,18 +51,18 @@ using DeviceConvWrWInstance = ck::tensor_operation::device::
2, // MXdlPerWave
2, // NXdlPerWave
S<4, 16, 4>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<0, 2, 1>, // ABlockTransferThreadClusterArrangeOrder
S<2, 0, 1>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
1, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_K1
2, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<4, 16, 4>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<2, 0, 1>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
1, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_K1
2, // BBlockTransferDstScalarPerVector_K1
true, // BBlockLdsAddExtraN
1, // CShuffleMXdlPerWavePerShuffle
1, // CShuffleNXdlPerWavePerShuffle
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment