Commit 129cc2e3 authored by Chao Liu's avatar Chao Liu
Browse files

Merge branch 'dynamic_tensor_descriptor' of...

Merge branch 'dynamic_tensor_descriptor' of github.com:asroy/modular_convolution into dynamic_tensor_descriptor
parents 245723a3 1762eb16
......@@ -7,17 +7,17 @@
#endif
#include "bfloat16_dev.hpp"
#if 1
#if 0
#define CK_AMD_GPU_GFX906 1
#elif 0
#define CK_AMD_GPU_GFX908 1
#else
#define CK_AMD_GPU_GFX906 1
#define CK_AMD_GPU_GFX1030 1
#endif
#if defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908)
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
#elif defined(CK_AMD_GPU_GFX_1030)
#elif defined(CK_AMD_GPU_GFX1030)
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
#endif
......
......@@ -175,7 +175,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(InDesc
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmN = 1;
constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmM1 = 2;
#elif 1
#elif 0
// cdata = 64, BlockSize = 64, 16x256x2
constexpr index_t BlockSize = 64;
......
......@@ -77,7 +77,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<1, 1>;
using RightPads = Sequence<1, 1>;
#elif 0
#elif 1
constexpr index_t N = 1;
constexpr index_t C = 4;
constexpr index_t HI = 1080;
......@@ -701,7 +701,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 0
#elif 1
device_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
......@@ -713,7 +713,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 1
#elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment