"git@developer.sourcefind.cn:OpenDAS/opencompass.git" did not exist on "97c2068bd9b21ac2b30177db6531554f4695bc51"
Commit 80901f59 authored by Chao Liu's avatar Chao Liu
Browse files

nvidia build

parent b628bf28
...@@ -50,7 +50,6 @@ include_directories(BEFORE ...@@ -50,7 +50,6 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/external/include ${PROJECT_SOURCE_DIR}/external/include
${PROJECT_SOURCE_DIR}/driver/include ${PROJECT_SOURCE_DIR}/driver/include
${PROJECT_BINARY_DIR}/composable_kernel/include/utility ${PROJECT_BINARY_DIR}/composable_kernel/include/utility
${HALF_INCLUDE_DIR}
) )
if(DEVICE_BACKEND STREQUAL "AMD") if(DEVICE_BACKEND STREQUAL "AMD")
......
...@@ -27,7 +27,9 @@ ...@@ -27,7 +27,9 @@
#if CK_USE_AMD_XDLOPS #if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp" #include "amd_xdlops.hpp"
#else #endif
#if CK_USE_AMD_XDLOPS_EMULATE
#include "amd_xdlops_emulate.hpp" #include "amd_xdlops_emulate.hpp"
#endif #endif
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#define CK_USE_AMD_BUFFER_ADDRESSING_INTRINSIC 0 #define CK_USE_AMD_BUFFER_ADDRESSING_INTRINSIC 0
#define CK_USE_AMD_XDLOPS 0 #define CK_USE_AMD_XDLOPS 0
#define CK_USE_AMD_XDLOPS_INLINE_ASM 0 #define CK_USE_AMD_XDLOPS_INLINE_ASM 0
#define CK_USE_AMD_XDLOPS_EMULATE 0
// experimental implementation // experimental implementation
#define CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE 0 #define CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE 0
......
...@@ -133,7 +133,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc, ...@@ -133,7 +133,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 2; constexpr index_t WeiBlockCopySrcDataPerRead_E = 2;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 0 #elif 1
// cdata = 64, BlockSize = 256, 128x128x8 // cdata = 64, BlockSize = 256, 128x128x8
constexpr index_t BlockSize = 256; constexpr index_t BlockSize = 256;
...@@ -563,7 +563,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc, ...@@ -563,7 +563,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 1; constexpr index_t WeiBlockCopySrcDataPerRead_E = 1;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1 #elif 0
// cdata = 64, BlockSize = 64, 32x128x3 // cdata = 64, BlockSize = 64, 32x128x3
constexpr index_t BlockSize = 64; constexpr index_t BlockSize = 64;
...@@ -641,7 +641,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc, ...@@ -641,7 +641,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 1; constexpr index_t WeiBlockCopySrcDataPerRead_E = 1;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1 #elif 0
// cdata = 64, BlockSize = 64, 32x128x4 // cdata = 64, BlockSize = 64, 32x128x4
constexpr index_t BlockSize = 64; constexpr index_t BlockSize = 64;
......
...@@ -23,7 +23,7 @@ int main(int argc, char* argv[]) ...@@ -23,7 +23,7 @@ int main(int argc, char* argv[])
{ {
using namespace ck; using namespace ck;
#if 0 #if 1
// 1x1, 17x17 // 1x1, 17x17
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 1024; constexpr index_t C = 1024;
...@@ -594,7 +594,7 @@ int main(int argc, char* argv[]) ...@@ -594,7 +594,7 @@ int main(int argc, char* argv[])
#elif 0 #elif 0
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw( device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat); (in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
#elif 0 #elif 1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc, device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw, in_nchw,
wei_kcyx_desc, wei_kcyx_desc,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment