"library/vscode:/vscode.git/clone" did not exist on "a49115b95edde18cacc8921c9a3ab9388dd907fa"
Commit 438138c0 authored by wangshaojie6's avatar wangshaojie6
Browse files

using 256x256x32 tile size

parent ebf3d70b
......@@ -45,13 +45,13 @@ using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
OutElementOp, // OutElementwiseOperation
256, // BlockSize
256, // MPerBlock
128, // NPerBlock
256, // NPerBlock
4, // K0PerBlock
8, // K1
32, // MPerXdl
32, // NPerXdl
4, // MXdlPerWave
2, // NXdlPerWave
4, // NXdlPerWave
S<1, 4, 32, 2>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<0, 3, 1, 2>, // ABlockTransferThreadClusterArrangeOrder
S<0, 2, 1, 3>, // ABlockTransferSrcAccessOrder
......@@ -59,12 +59,12 @@ using DeviceConvBwdWeightInstance = ck::tensor_operation::device::
8, // ABlockTransferSrcScalarPerVector
4, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<1, 4, 16, 4>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 4, 32, 2>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<0, 3, 1, 2>, // BBlockTransferThreadClusterArrangeOrder
S<0, 2, 1, 3>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
2, // BBlockTransferDstScalarPerVector_K1
4, // BBlockTransferDstScalarPerVector_K1
true, // BBlockLdsAddExtraN
1, // CShuffleMXdlPerWavePerShuffle
1, // CShuffleNXdlPerWavePerShuffle
......
......@@ -15,7 +15,7 @@
#ifdef CK_USE_LAUNCH_BOUNDS
#define CK_MAX_THREAD_PER_BLOCK 256
#define CK_MIN_BLOCK_PER_CU 2
#define CK_MIN_BLOCK_PER_CU 1
#endif
// check GPU target
......@@ -107,7 +107,7 @@
// experimental feature: use __builtin_memcpy instead of pointer cast to access a vector from
// pointer of scalar
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 1
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS 0
// experimental feature: use __builtin_memcpy instead of union to do bit_cast
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment