Unverified Commit 12649254 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

reorganize files to prepare for MIOpen integration (#51)

* change olc cmake

* adding online compile to fwd-v4r5r2

* update scripts

* remane fwd-v4r5r2 to fwd-v6r1

* clean up
parent fbdf4332
#include "config.hpp"
#include "device.hpp"
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{
#if CK_DEVICE_BACKEND_AMD
hipGetErrorString(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
#elif CK_DEVICE_BACKEND_NVIDIA
cudaMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize);
#endif
}
void* DeviceMem::GetDeviceBuffer() { return mpDeviceBuf; }
void DeviceMem::ToDevice(const void* p)
{
#if CK_DEVICE_BACKEND_AMD
hipGetErrorString(
hipMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, hipMemcpyHostToDevice));
#elif CK_DEVICE_BACKEND_NVIDIA
cudaMemcpy(mpDeviceBuf, const_cast<void*>(p), mMemSize, cudaMemcpyHostToDevice);
#endif
}
void DeviceMem::FromDevice(void* p)
{
#if CK_DEVICE_BACKEND_AMD
hipGetErrorString(hipMemcpy(p, mpDeviceBuf, mMemSize, hipMemcpyDeviceToHost));
#elif CK_DEVICE_BACKEND_NVIDIA
cudaMemcpy(p, mpDeviceBuf, mMemSize, cudaMemcpyDeviceToHost);
#endif
}
DeviceMem::~DeviceMem()
{
#if CK_DEVICE_BACKEND_AMD
hipGetErrorString(hipFree(mpDeviceBuf));
#elif CK_DEVICE_BACKEND_NVIDIA
cudaFree(mpDeviceBuf);
#endif
}
DeviceMem::~DeviceMem() { hipGetErrorString(hipFree(mpDeviceBuf)); }
struct KernelTimerImpl
{
KernelTimerImpl()
{
#if CK_DEVICE_BACKEND_AMD
hipEventCreate(&mStart);
hipEventCreate(&mEnd);
#elif CK_DEVICE_BACKEND_NVIDIA
cudaEventCreate(&mStart);
cudaEventCreate(&mEnd);
#endif
}
~KernelTimerImpl()
{
#if CK_DEVICE_BACKEND_AMD
hipEventDestroy(mStart);
hipEventDestroy(mEnd);
#elif CK_DEVICE_BACKEND_NVIDIA
cudaEventDestroy(mStart);
cudaEventDestroy(mEnd);
#endif
}
void Start()
{
#if CK_DEVICE_BACKEND_AMD
hipDeviceSynchronize();
hipEventRecord(mStart, 0);
#elif CK_DEVICE_BACKEND_NVIDIA
cudaDeviceSynchronize();
cudaEventRecord(mStart, 0);
#endif
}
void End()
{
#if CK_DEVICE_BACKEND_AMD
hipEventRecord(mEnd, 0);
hipEventSynchronize(mEnd);
#elif CK_DEVICE_BACKEND_NVIDIA
cudaEventRecord(mEnd, 0);
cudaEventSynchronize(mEnd);
#endif
}
float GetElapsedTime() const
{
float time;
#if CK_DEVICE_BACKEND_AMD
hipEventElapsedTime(&time, mStart, mEnd);
#elif CK_DEVICE_BACKEND_NVIDIA
cudaEventElapsedTime(&time, mStart, mEnd);
#endif
return time;
}
#if CK_DEVICE_BACKEND_AMD
hipEvent_t mStart, mEnd;
#elif CK_DEVICE_BACKEND_NVIDIA
cudaEvent_t mStart, mEnd;
#endif
};
KernelTimer::KernelTimer() : impl(new KernelTimerImpl()) {}
......
set(CMAKE_CXX_COMPILER /opt/rocm/llvm/bin/clang++)
## for online-compiling of HIP kernels
......@@ -17,6 +16,7 @@ if(OLC_HIP_COMPILER MATCHES ".*clang\\+\\+$")
${CMAKE_INSTALL_PREFIX}/llvm
)
endif()
if(OLC_OFFLOADBUNDLER_BIN)
message(STATUS "clang-offload-bundler found: ${OLC_OFFLOADBUNDLER_BIN}")
set(OLC_OFFLOADBUNDLER_BIN "${OLC_OFFLOADBUNDLER_BIN}")
......@@ -67,92 +67,58 @@ else()
set(OLC_DEBUG 0)
endif()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/config.h.in" "${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/config.h")
configure_file("${PROJECT_SOURCE_DIR}/host/online_compilation/include/config.h.in" "${PROJECT_BINARY_DIR}/host/online_compilation/include/config.h")
include_directories(BEFORE
${PROJECT_BINARY_DIR}/host/online_compilation/include
)
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
## HIP_COMPILER_FLAGS will be used for on-line compiling of the HIP kernels
add_definitions("-DHIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_1 "${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_2 "${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_3 "${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_4 "${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_5 "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_6 "${PROJECT_SOURCE_DIR}/external/rocm/include/bfloat16_dev.hpp")
file(GLOB_RECURSE COMPOSABLE_KERNEL_INCLUDE_1 "${PROJECT_SOURCE_DIR}/composable_kernel/include/*/*.hpp")
file(GLOB COMPOSABLE_KERNEL_INCLUDE_2 "${PROJECT_SOURCE_DIR}/external/rocm/include/bfloat16_dev.hpp")
set(MCONV_KERNEL_INCLUDES
${COMPOSABLE_KERNEL_INCLUDE_1}
${COMPOSABLE_KERNEL_INCLUDE_2}
${COMPOSABLE_KERNEL_INCLUDE_3}
${COMPOSABLE_KERNEL_INCLUDE_4}
${COMPOSABLE_KERNEL_INCLUDE_5}
${COMPOSABLE_KERNEL_INCLUDE_6}
)
set(MCONV_KERNELS
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.cpp
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r5_nchw_kcyx_nkhw.cpp
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw.cpp
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.cpp
)
file(GLOB_RECURSE MCONV_KERNELS "${PROJECT_SOURCE_DIR}/composable_kernel/src/kernel_wrapper/*.cpp")
add_kernels("olCompiling/" "${MCONV_KERNELS}")
add_kernel_includes("olCompiling/" "${MCONV_KERNEL_INCLUDES}")
add_kernels(${CMAKE_CURRENT_SOURCE_DIR} "${MCONV_KERNELS}")
add_kernel_includes(${CMAKE_CURRENT_SOURCE_DIR} "${MCONV_KERNEL_INCLUDES}")
set(MCONV_SOURCES
src/host_tensor.cpp;
src/device.cpp;
set(ONLINE_COMPILATION_SOURCE
${PROJECT_BINARY_DIR}/kernel.cpp
${PROJECT_BINARY_DIR}/kernel_includes.cpp
)
set(OLC_HIP_UTILITY_HEADERS
olCompiling/include/config.h
olCompiling/include/logger.hpp
olCompiling/include/stringutils.hpp
olCompiling/include/tmp_dir.hpp
olCompiling/include/write_file.hpp
olCompiling/include/env.hpp
olCompiling/include/manage_ptr.hpp
olCompiling/include/md5.hpp
olCompiling/include/simple_hash.hpp
olCompiling/include/exec_utils.hpp
olCompiling/include/hipCheck.hpp
olCompiling/include/target_properties.hpp
olCompiling/include/handle.hpp
olCompiling/include/op_kernel_args.hpp
olCompiling/include/kernel.hpp
olCompiling/include/kernel_build_params.hpp
olCompiling/include/hip_build_utils.hpp
olCompiling/include/hipoc_program.hpp
olCompiling/include/hipoc_program_impl.hpp
olCompiling/include/hipoc_kernel.hpp
olCompiling/include/kernel_cache.hpp
olCompiling/include/binary_cache.hpp
)
include_directories(BEFORE
${PROJECT_BINARY_DIR}/host/online_compilation/include
include
)
set(OLC_HIP_UTILITY_CPPS
olCompiling/hip_utility/logger.cpp
olCompiling/hip_utility/tmp_dir.cpp
olCompiling/hip_utility/md5.cpp
olCompiling/hip_utility/exec_utils.cpp
olCompiling/hip_utility/target_properties.cpp
olCompiling/hip_utility/handlehip.cpp
olCompiling/hip_utility/kernel_build_params.cpp
olCompiling/hip_utility/hip_build_utils.cpp
olCompiling/hip_utility/hipoc_program.cpp
olCompiling/hip_utility/hipoc_kernel.cpp
olCompiling/hip_utility/kernel_cache.cpp
olCompiling/hip_utility/binary_cache.cpp
hip_utility/logger.cpp
hip_utility/tmp_dir.cpp
hip_utility/md5.cpp
hip_utility/exec_utils.cpp
hip_utility/target_properties.cpp
hip_utility/handlehip.cpp
hip_utility/kernel_build_params.cpp
hip_utility/hip_build_utils.cpp
hip_utility/hipoc_program.cpp
hip_utility/hipoc_kernel.cpp
hip_utility/kernel_cache.cpp
hip_utility/binary_cache.cpp
)
list(APPEND OLC_SOURCES ${OLC_HIP_UTILITY_CPPS} ${OLC_HIP_UTILITY_HEADERS})
list(INSERT MCONV_SOURCES 0
${PROJECT_BINARY_DIR}/kernel.cpp
${PROJECT_BINARY_DIR}/kernel_includes.cpp
)
## addkernels provide the tool to create inlined kernels in one header
add_subdirectory(olCompiling/addkernels)
add_subdirectory(addkernels)
function(inline_kernels_src KERNELS KERNEL_INCLUDES)
set(KERNEL_SRC_HPP_FILENAME batch_all.cpp.hpp)
......@@ -166,7 +132,7 @@ function(inline_kernels_src KERNELS KERNEL_INCLUDES)
COMMAND $<TARGET_FILE:addkernels> -target ${KERNEL_SRC_HPP_PATH} -extern -source ${KERNELS}
COMMENT "Inlining All kernels"
)
configure_file(olCompiling/kernels_batch.cpp.in ${KERNEL_SRC_CPP_PATH})
configure_file(kernels_batch.cpp.in ${KERNEL_SRC_CPP_PATH})
list(APPEND OLC_SOURCES ${KERNEL_SRC_CPP_PATH} ${KERNEL_SRC_HPP_PATH})
set(OLC_SOURCES ${OLC_SOURCES} PARENT_SCOPE)
......@@ -174,7 +140,7 @@ endfunction()
inline_kernels_src("${MCONV_KERNELS}" "${MCONV_KERNEL_INCLUDES}")
list(APPEND MCONV_SOURCES ${OLC_SOURCES} ${PROJECT_BINARY_DIR}/olc_kernel_includes.h)
list(APPEND ONLINE_COMPILATION_SOURCE ${OLC_SOURCES} ${PROJECT_BINARY_DIR}/olc_kernel_includes.h)
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/olc_kernel_includes.h
......@@ -185,19 +151,17 @@ add_custom_command(
)
## the library target
add_library(modConv SHARED ${MCONV_SOURCES})
target_include_directories(modConv PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/)
target_include_directories(modConv PRIVATE ${PROJECT_BINARY_DIR})
target_include_directories(modConv PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)
add_library(online_compilation SHARED ${ONLINE_COMPILATION_SOURCE})
target_link_libraries(modConv PRIVATE hip::device)
target_link_libraries(modConv INTERFACE hip::host)
target_link_libraries(modConv PRIVATE Boost::filesystem)
target_include_directories(online_compilation PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/online_compilation/include/)
target_include_directories(online_compilation PRIVATE ${PROJECT_BINARY_DIR})
target_include_directories(online_compilation PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)
target_compile_options(modConv PRIVATE -mfma)
target_link_libraries(online_compilation PRIVATE hip::device)
target_link_libraries(online_compilation INTERFACE hip::host)
target_link_libraries(online_compilation PRIVATE Boost::filesystem)
target_compile_features(modConv PUBLIC)
set_target_properties(modConv PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_features(online_compilation PUBLIC)
set_target_properties(online_compilation PROPERTIES POSITION_INDEPENDENT_CODE ON)
install(TARGETS modConv LIBRARY DESTINATION lib)
install(TARGETS online_compilation LIBRARY DESTINATION lib)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment