Commit 80901f59 authored by Chao Liu's avatar Chao Liu
Browse files

nvidia build

parent b628bf28
......@@ -50,7 +50,6 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/external/include
${PROJECT_SOURCE_DIR}/driver/include
${PROJECT_BINARY_DIR}/composable_kernel/include/utility
${HALF_INCLUDE_DIR}
)
if(DEVICE_BACKEND STREQUAL "AMD")
......
......@@ -27,7 +27,9 @@
#if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp"
#else
#endif
#if CK_USE_AMD_XDLOPS_EMULATE
#include "amd_xdlops_emulate.hpp"
#endif
......
......@@ -19,6 +19,7 @@
#define CK_USE_AMD_BUFFER_ADDRESSING_INTRINSIC 0
#define CK_USE_AMD_XDLOPS 0
#define CK_USE_AMD_XDLOPS_INLINE_ASM 0
#define CK_USE_AMD_XDLOPS_EMULATE 0
// experimental implementation
#define CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE 0
......
......@@ -133,7 +133,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 2;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 0
#elif 1
// cdata = 64, BlockSize = 256, 128x128x8
constexpr index_t BlockSize = 256;
......@@ -563,7 +563,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 1;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1
#elif 0
// cdata = 64, BlockSize = 64, 32x128x3
constexpr index_t BlockSize = 64;
......@@ -641,7 +641,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 1;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1
#elif 0
// cdata = 64, BlockSize = 64, 32x128x4
constexpr index_t BlockSize = 64;
......
......@@ -23,7 +23,7 @@ int main(int argc, char* argv[])
{
using namespace ck;
#if 0
#if 1
// 1x1, 17x17
constexpr index_t N = 128;
constexpr index_t C = 1024;
......@@ -594,7 +594,7 @@ int main(int argc, char* argv[])
#elif 0
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw(
(in_nchw_desc, in_nchw, wei_kcyx_desc, wei_kcyx, out_nkhw_desc, out_nkhw_device, nrepeat);
#elif 0
#elif 1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment