Commit 5af722b4 authored by Chao Liu's avatar Chao Liu
Browse files

update buffer addressing

parent c68d9147
......@@ -29,8 +29,8 @@
#endif
// only gfx908 support native floating point atomic add
#ifndef CK_USE_AMD_BUFFER_ATOMIC_ADD
#define CK_USE_AMD_BUFFER_ATOMIC_ADD 0
#ifndef CK_USE_AMD_BUFFER_ATOMIC_FADD
#define CK_USE_AMD_BUFFER_ATOMIC_FADD 0
#endif
// AMD XDLOPS
......@@ -52,8 +52,8 @@
#endif
// experimental implementation
#ifndef CK_EXPERIMENTAL_AMD_BUFFER_ADDRESSING_USE_OFFSET_TRICK
#define CK_EXPERIMENTAL_AMD_BUFFER_ADDRESSING_USE_OFFSET_TRICK 1
#ifndef CK_EXPERIMENTAL_USE_BUFFER_ADDRESS_OOB_CHECK
#define CK_EXPERIMENTAL_USE_BUFFER_ADDRESS_OOB_CHECK 1
#endif
#ifndef CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE
......@@ -73,10 +73,6 @@
#ifndef CK_WORKAROUND_SWDEV_229564
#define CK_WORKAROUND_SWDEV_229564 1
#endif
// workaround for buffer load/store fp16/bfp16 intrinsic bug
#ifndef CK_WORKAROUND_SWDEV_231101
#define CK_WORKAROUND_SWDEV_231101 1
#endif
namespace ck {
......
......@@ -187,7 +187,7 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmN = 4;
constexpr index_t GemmCThreadCopyDstDataPerWrite_GemmN1 = 4;
#elif 0
#elif 1
// cdata = 64, BlockSize = 256, 128x128x16
constexpr index_t BlockSize = 256;
......
......@@ -51,10 +51,10 @@ int main(int argc, char* argv[])
#elif 1
// 3x3, 28x28
constexpr index_t N = 128;
constexpr index_t C = 256;
constexpr index_t C = 128;
constexpr index_t HI = 28;
constexpr index_t WI = 28;
constexpr index_t K = 1024;
constexpr index_t K = 128;
constexpr index_t Y = 3;
constexpr index_t X = 3;
......@@ -245,7 +245,7 @@ int main(int argc, char* argv[])
device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw
#elif 0
device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw
#elif 0
#elif 1
device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw
#elif 1
device_convolution_backward_data_implicit_gemm_v5r1_nhwc_kyxc_nhwk
......
......@@ -201,7 +201,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif 1
#elif 0
// 3x3, 35x35, stride 2
constexpr index_t N = 128;
constexpr index_t C = 288;
......@@ -339,7 +339,7 @@ int main(int argc, char* argv[])
#elif 1
// 3x3, 28x28
constexpr index_t N = 128;
constexpr index_t C = 192;
constexpr index_t C = 128;
constexpr index_t HI = 28;
constexpr index_t WI = 28;
constexpr index_t K = 128;
......@@ -561,7 +561,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 0
#elif 1
device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment