Commit a414e3fd authored by Chao Liu's avatar Chao Liu
Browse files

update build

parent 67c6f73f
cmake_minimum_required(VERSION 2.8.3) #cmake_minimum_required(VERSION 2.8.3)
cmake_minimum_required(VERSION 3.9)
project(modular_convolution) project(modular_convolution)
#c++ #c++
......
set(DRIVER_SOURCE driver.cpp) if(DEVICE_BACKEND STREQUAL "HIP")
set(DRIVER_SOURCE driver.hip.cpp)
elseif(DEVICE_BACKEND STREQUAL "CUDA")
set(DRIVER_SOURCE driver.cu)
endif()
add_executable(driver ${DRIVER_SOURCE}) add_executable(driver ${DRIVER_SOURCE})
target_link_libraries(driver PRIVATE tensor) target_link_libraries(driver PRIVATE tensor)
driver.hip.cpp
\ No newline at end of file
...@@ -50,7 +50,7 @@ struct GeneratorTensor_3 ...@@ -50,7 +50,7 @@ struct GeneratorTensor_3
std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...}; std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...};
std::vector<std::size_t> lens(sizeof...(Is), 100); std::vector<std::size_t> lens(sizeof...(Is), 100);
std::vector<std::size_t> strides(sizeof...(Is), 1); std::vector<std::size_t> strides(sizeof...(Is), 1);
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is)-1), strides.rbegin() + 1); std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is) - 1), strides.rbegin() + 1);
return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1; return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1;
#endif #endif
} }
......
...@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) ...@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "CUDA") if(DEVICE_BACKEND STREQUAL "CUDA")
target_link_libraries(device nvToolsExt cudart) target_link_libraries(tensor nvToolsExt cudart)
endif() endif()
install(TARGETS tensor LIBRARY DESTINATION lib) install(TARGETS tensor LIBRARY DESTINATION lib)
...@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args) ...@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
hipGetErrorString(hipGetLastError()); hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA #elif DEVICE_BACKEND_CUDA
const void* f = reinterpret_cast<const void*>(kernel); const void* f = reinterpret_cast<const void*>(kernel);
void* p_args = {&args...}; void* p_args[] = {&args...};
timer.Start(); timer.Start();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment