Commit a414e3fd authored by Chao Liu's avatar Chao Liu
Browse files

update build

parent 67c6f73f
cmake_minimum_required(VERSION 2.8.3)
#cmake_minimum_required(VERSION 2.8.3)
cmake_minimum_required(VERSION 3.9)
project(modular_convolution)
#c++
......
set(DRIVER_SOURCE driver.cpp)
if(DEVICE_BACKEND STREQUAL "HIP")
set(DRIVER_SOURCE driver.hip.cpp)
elseif(DEVICE_BACKEND STREQUAL "CUDA")
set(DRIVER_SOURCE driver.cu)
endif()
add_executable(driver ${DRIVER_SOURCE})
target_link_libraries(driver PRIVATE tensor)
driver.hip.cpp
\ No newline at end of file
......@@ -50,7 +50,7 @@ struct GeneratorTensor_3
std::initializer_list<std::size_t> ids = {static_cast<std::size_t>(is)...};
std::vector<std::size_t> lens(sizeof...(Is), 100);
std::vector<std::size_t> strides(sizeof...(Is), 1);
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is)-1), strides.rbegin() + 1);
std::partial_sum(lens.rbegin(), lens.rbegin() + (sizeof...(Is) - 1), strides.rbegin() + 1);
return std::inner_product(ids.begin(), ids.end(), strides.begin(), std::size_t(0)) + 1;
#endif
}
......@@ -340,7 +340,7 @@ void host_winograd_3x3_convolution(
std::size_t ho = OutTileSizeH * y + j;
for(int i = 0; i < OutTileSizeW; ++i)
{
std::size_t wo = OutTileSizeW * x + i;
std::size_t wo = OutTileSizeW * x + i;
out(n, k, ho, wo) = out_hold(n, k, y, x, j, i);
}
}
......@@ -393,13 +393,13 @@ int main()
constexpr unsigned WPad = 0;
#elif 0
// 3x3, 34x34
constexpr unsigned N = 64;
constexpr unsigned C = 256;
constexpr unsigned N = 64;
constexpr unsigned C = 256;
constexpr unsigned HI = 34;
constexpr unsigned WI = 34;
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
constexpr unsigned K = 64;
constexpr unsigned S = 3;
constexpr unsigned R = 3;
constexpr unsigned HPad = 0;
constexpr unsigned WPad = 0;
......
......@@ -11,7 +11,7 @@ set_target_properties(tensor PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(DEVICE_BACKEND STREQUAL "CUDA")
target_link_libraries(device nvToolsExt cudart)
target_link_libraries(tensor nvToolsExt cudart)
endif()
install(TARGETS tensor LIBRARY DESTINATION lib)
......@@ -43,7 +43,7 @@ float launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, Args... args)
hipGetErrorString(hipGetLastError());
#elif DEVICE_BACKEND_CUDA
const void* f = reinterpret_cast<const void*>(kernel);
void* p_args = {&args...};
void* p_args[] = {&args...};
timer.Start();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment