Commit 5af722b4 authored by Chao Liu's avatar Chao Liu
Browse files

update buffer addressing

parent c68d9147
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
#endif #endif
// only gfx908 support native floating point atomic add // only gfx908 support native floating point atomic add
#ifndef CK_USE_AMD_BUFFER_ATOMIC_ADD #ifndef CK_USE_AMD_BUFFER_ATOMIC_FADD
#define CK_USE_AMD_BUFFER_ATOMIC_ADD 0 #define CK_USE_AMD_BUFFER_ATOMIC_FADD 0
#endif #endif
// AMD XDLOPS // AMD XDLOPS
...@@ -52,8 +52,8 @@ ...@@ -52,8 +52,8 @@
#endif #endif
// experimental implementation // experimental implementation
#ifndef CK_EXPERIMENTAL_AMD_BUFFER_ADDRESSING_USE_OFFSET_TRICK #ifndef CK_EXPERIMENTAL_USE_BUFFER_ADDRESS_OOB_CHECK
#define CK_EXPERIMENTAL_AMD_BUFFER_ADDRESSING_USE_OFFSET_TRICK 1 #define CK_EXPERIMENTAL_USE_BUFFER_ADDRESS_OOB_CHECK 1
#endif #endif
#ifndef CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE #ifndef CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE
...@@ -73,10 +73,6 @@ ...@@ -73,10 +73,6 @@
#ifndef CK_WORKAROUND_SWDEV_229564 #ifndef CK_WORKAROUND_SWDEV_229564
#define CK_WORKAROUND_SWDEV_229564 1 #define CK_WORKAROUND_SWDEV_229564 1
#endif #endif
// workaround for buffer load/store fp16/bfp16 intrinsic bug
#ifndef CK_WORKAROUND_SWDEV_231101
#define CK_WORKAROUND_SWDEV_231101 1
#endif
namespace ck { namespace ck {
......
...@@ -187,7 +187,7 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc, ...@@ -187,7 +187,7 @@ void device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(InDesc,
constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmN = 4; constexpr index_t GemmBBlockCopyDstDataPerWrite_GemmN = 4;
constexpr index_t GemmCThreadCopyDstDataPerWrite_GemmN1 = 4; constexpr index_t GemmCThreadCopyDstDataPerWrite_GemmN1 = 4;
#elif 0 #elif 1
// cdata = 64, BlockSize = 256, 128x128x16 // cdata = 64, BlockSize = 256, 128x128x16
constexpr index_t BlockSize = 256; constexpr index_t BlockSize = 256;
......
...@@ -51,10 +51,10 @@ int main(int argc, char* argv[]) ...@@ -51,10 +51,10 @@ int main(int argc, char* argv[])
#elif 1 #elif 1
// 3x3, 28x28 // 3x3, 28x28
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 256; constexpr index_t C = 128;
constexpr index_t HI = 28; constexpr index_t HI = 28;
constexpr index_t WI = 28; constexpr index_t WI = 28;
constexpr index_t K = 1024; constexpr index_t K = 128;
constexpr index_t Y = 3; constexpr index_t Y = 3;
constexpr index_t X = 3; constexpr index_t X = 3;
...@@ -245,7 +245,7 @@ int main(int argc, char* argv[]) ...@@ -245,7 +245,7 @@ int main(int argc, char* argv[])
device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw
#elif 0 #elif 0
device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw
#elif 0 #elif 1
device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw
#elif 1 #elif 1
device_convolution_backward_data_implicit_gemm_v5r1_nhwc_kyxc_nhwk device_convolution_backward_data_implicit_gemm_v5r1_nhwc_kyxc_nhwk
......
...@@ -201,7 +201,7 @@ int main(int argc, char* argv[]) ...@@ -201,7 +201,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>; using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>; using RightPads = Sequence<0, 0>;
#elif 1 #elif 0
// 3x3, 35x35, stride 2 // 3x3, 35x35, stride 2
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 288; constexpr index_t C = 288;
...@@ -339,7 +339,7 @@ int main(int argc, char* argv[]) ...@@ -339,7 +339,7 @@ int main(int argc, char* argv[])
#elif 1 #elif 1
// 3x3, 28x28 // 3x3, 28x28
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 192; constexpr index_t C = 128;
constexpr index_t HI = 28; constexpr index_t HI = 28;
constexpr index_t WI = 28; constexpr index_t WI = 28;
constexpr index_t K = 128; constexpr index_t K = 128;
...@@ -561,7 +561,7 @@ int main(int argc, char* argv[]) ...@@ -561,7 +561,7 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 0 #elif 1
device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc, device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw, in_nchw,
wei_kcyx_desc, wei_kcyx_desc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment