Commit a414e3fd authored by Chao Liu's avatar Chao Liu
Browse files

update build

parent 67c6f73f
cmake_minimum_required(VERSION 2.8.3) #cmake_minimum_required(VERSION 2.8.3)
cmake_minimum_required(VERSION 3.9)
project(modular_convolution) project(modular_convolution)
#c++ #c++
......
set(DRIVER_SOURCE driver.cpp) if(DEVICE_BACKEND STREQUAL "HIP")
set(DRIVER_SOURCE driver.hip.cpp)
elseif(DEVICE_BACKEND STREQUAL "CUDA")
set(DRIVER_SOURCE driver.cu)
endif()
add_executable(driver ${DRIVER_SOURCE}) add_executable(driver ${DRIVER_SOURCE})
target_link_libraries(driver PRIVATE tensor) target_link_libraries(driver PRIVATE tensor)
driver.hip.cpp
\ No newline at end of file
...@@ -50,7 +50,7 @@ struct GeneratorTensor_3 ...@@ -50,7 +50,7 @@ struct GeneratorTensor_3
std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...}; std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...};
std::vector<std::size_t> lens(sizeof...(Is), 100); std::vector<std::size_t> lens(sizeof...(Is), 100);
std::vector<std::size_t> strides(sizeof...(Is), 1); std::vector<std::size_t> strides(sizeof...(Is), 1);
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is)-1), strides.rbegin() + 1); std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is) - 1), strides.rbegin() + 1);
return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1; return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1;
#endif #endif
} }
...@@ -340,7 +340,7 @@ void host_winograd_3x3_convolution( ...@@ -340,7 +340,7 @@ void host_winograd_3x3_convolution(
std::size_t ho = OutTileSizeH * y + j; std::size_t ho = OutTileSizeH * y + j;
for(int i = 0; i < OutTileSizeW; ++i) for(int i = 0; i < OutTileSizeW; ++i)
{ {
std::size_t wo = OutTileSizeW * x + i; std::size_t wo = OutTileSizeW * x + i;
out(n, k, ho, wo) = out_hold(n, k, y, x, j, i); out(n, k, ho, wo) = out_hold(n, k, y, x, j, i);
} }
} }
...@@ -393,13 +393,13 @@ int main() ...@@ -393,13 +393,13 @@ int main()
constexpr unsigned WPad = 0; constexpr unsigned WPad = 0;
#elif 0 #elif 0
// 3x3, 34x34 // 3x3, 34x34
constexpr unsigned N = 64; constexpr unsigned N = 64;
constexpr unsigned C = 256; constexpr unsigned C = 256;
constexpr unsigned HI = 34; constexpr unsigned HI = 34;
constexpr unsigned WI = 34; constexpr unsigned WI = 34;
constexpr unsigned K = 64; constexpr unsigned K = 64;
constexpr unsigned S = 3; constexpr unsigned S = 3;
constexpr unsigned R = 3; constexpr unsigned R = 3;
constexpr unsigned HPad = 0; constexpr unsigned HPad = 0;
constexpr unsigned WPad = 0; constexpr unsigned WPad = 0;
......
...@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON) ...@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "CUDA") if(DEVICE_BACKEND STREQUAL "CUDA")
target_link_libraries(device nvToolsExt cudart) target_link_libraries(tensor nvToolsExt cudart)
endif() endif()
install(TARGETS tensor LIBRARY DESTINATION lib) install(TARGETS tensor LIBRARY DESTINATION lib)
...@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args) ...@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
hipGetErrorString(hipGetLastError()); hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA #elif DEVICE_BACKEND_CUDA
const void* f = reinterpret_cast<const void*>(kernel); const void* f = reinterpret_cast<const void*>(kernel);
void* p_args = {&args...}; void* p_args[] = {&args...};
timer.Start(); timer.Start();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment