Unverified Commit 12649254 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

reorganize files to prepare for MIOpen integration (#51)

* change olc cmake

* adding online compile to fwd-v4r5r2

* update scripts

* remane fwd-v4r5r2 to fwd-v6r1

* clean up
parent fbdf4332
include_directories(BEFORE
include
${PROJECT_BINARY_DIR}/host/online_compilation/include
${PROJECT_SOURCE_DIR}/host/online_compilation/include
${PROJECT_SOURCE_DIR}/host/host_tensor/include
${PROJECT_SOURCE_DIR}/composable_kernel/include
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
${PROJECT_SOURCE_DIR}/composable_kernel/include/driver
${PROJECT_SOURCE_DIR}/external/rocm/include
${PROJECT_SOURCE_DIR}/external/half/include
)
set(CONV_FWD_DRIVER_ONLINE_SOURCE conv_fwd_driver_online.cpp)
add_executable(conv_fwd_driver_online ${CONV_FWD_DRIVER_ONLINE_SOURCE})
target_link_libraries(conv_fwd_driver_online PRIVATE host_tensor)
target_link_libraries(conv_fwd_driver_online PRIVATE online_compilation)
#ifndef CONV_TUNABLE_FWD_V4R4_NCHW_KCYX_NKHW_HPP
#define CONV_TUNABLE_FWD_V4R4_NCHW_KCYX_NKHW_HPP
struct tunable_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw
{
int32_t BlockSize;
int32_t MPerBlock;
int32_t NPerBlock;
int32_t KPerBlock;
int32_t M1PerThread;
int32_t N1PerThread;
int32_t KPerThread;
int32_t M1N1ThreadClusterM10;
int32_t M1N1ThreadClusterN10;
int32_t M1N1ThreadClusterM11;
int32_t M1N1ThreadClusterN11;
std::array<int32_t, 3> ABlockTransferThreadSliceLengths_K_M0_M1;
std::array<int32_t, 3> ABlockTransferThreadClusterLengths_K_M0_M1;
std::array<int32_t, 3> ABlockTransferThreadClusterArrangeOrder;
std::array<int32_t, 3> ABlockTransferSrcAccessOrder;
int32_t ABlockTransferSrcVectorDim;
int32_t ABlockTransferSrcScalarPerVector;
int32_t ABlockTransferDstScalarPerVector_M1;
bool AThreadTransferSrcResetCoordinateAfterRun;
std::array<int32_t, 3> BBlockTransferThreadSliceLengths_K_N0_N1;
std::array<int32_t, 3> BBlockTransferThreadClusterLengths_K_N0_N1;
std::array<int32_t, 3> BBlockTransferThreadClusterArrangeOrder;
std::array<int32_t, 3> BBlockTransferSrcAccessOrder;
int32_t BBlockTransferSrcVectorDim;
int32_t BBlockTransferSrcScalarPerVector;
int32_t BBlockTransferDstScalarPerVector_N1;
bool BThreadTransferSrcResetCoordinateAfterRun;
std::array<int32_t, 6> CThreadTransferSrcDstAccessOrder;
int32_t CThreadTransferSrcDstVectorDim;
int32_t CThreadTransferDstScalarPerVector;
};
static tunable_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw default_tunable_dyn_conv_fwd_v4r4_nchw_kcyx_nkhw = {
256, 128, 128, 8, 4, 4, 1,
8, 8, 2, 2, {4, 1, 1}, {2, 1, 128}, {2, 1, 0},
{2, 1, 0}, 0, 4, 1, false, {4, 1, 1}, {2, 1, 128},
{0, 1, 2}, {0, 1, 2}, 2, 1, 1, false, {3, 4, 5, 0, 1, 2},
5, 1};
#endif
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment