Commit fa75f6d9 authored by traveller59's avatar traveller59
Browse files

fix #55 windows don't support openmp 4

parent 48b9a86a
...@@ -8,6 +8,9 @@ else() ...@@ -8,6 +8,9 @@ else()
project(SparseConv LANGUAGES CXX VERSION 1.1) project(SparseConv LANGUAGES CXX VERSION 1.1)
endif() endif()
if(WIN32) # true if windows (32 and 64 bit)
add_compile_definitions(TV_WINDOWS)
endif()
set(CMAKE_CXX_EXTENSIONS OFF) # avoid gnu++11 be added to CXX flags set(CMAKE_CXX_EXTENSIONS OFF) # avoid gnu++11 be added to CXX flags
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")
...@@ -22,7 +25,6 @@ if (SPCONV_BuildCUDA) ...@@ -22,7 +25,6 @@ if (SPCONV_BuildCUDA)
get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}" DIRECTORY) get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CUDA_TOOLKIT_ROOT_DIR}" DIRECTORY)
if(WIN32) # true if windows (32 and 64 bit) if(WIN32) # true if windows (32 and 64 bit)
set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64") set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64")
add_compile_definitions(TV_WINDOWS)
else() else()
set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64") set(CUDA_LIB_PATH_HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif() endif()
...@@ -45,7 +47,6 @@ if (SPCONV_BuildCUDA) ...@@ -45,7 +47,6 @@ if (SPCONV_BuildCUDA)
set(ALL_INCLUDE ${ALL_INCLUDE} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) set(ALL_INCLUDE ${ALL_INCLUDE} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
add_subdirectory(src/cuhash) add_subdirectory(src/cuhash)
endif() endif()
add_subdirectory(src/spconv) add_subdirectory(src/spconv)
add_subdirectory(src/utils) add_subdirectory(src/utils)
......
...@@ -64,6 +64,7 @@ class CMakeBuild(build_ext): ...@@ -64,6 +64,7 @@ class CMakeBuild(build_ext):
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))] cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
# cmake_args += ['-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))] # cmake_args += ['-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
cmake_args += ['-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))] cmake_args += ['-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), str(Path(extdir) / "spconv"))]
cmake_args += ["-DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=TRUE"]
if sys.maxsize > 2**32: if sys.maxsize > 2**32:
cmake_args += ['-A', 'x64'] cmake_args += ['-A', 'x64']
build_args += ['--', '/m'] build_args += ['--', '/m']
......
add_library(cuhash STATIC hash_functions.cu hash_table.cpp hash_table.cu hash_functions.cpp) if(WIN32)
add_library(cuhash SHARED hash_functions.cu hash_table.cpp hash_table.cu hash_functions.cpp)
else()
add_library(cuhash STATIC hash_functions.cu hash_table.cpp hash_table.cu hash_functions.cpp)
endif()
target_include_directories(cuhash PRIVATE ${ALL_INCLUDE} ) target_include_directories(cuhash PRIVATE ${ALL_INCLUDE} )
set_property(TARGET cuhash PROPERTY CUDA_STANDARD 14) set_property(TARGET cuhash PROPERTY CUDA_STANDARD 14)
set_property(TARGET cuhash PROPERTY CXX_STANDARD 14) set_property(TARGET cuhash PROPERTY CXX_STANDARD 14)
set_target_properties(cuhash PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(cuhash PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_property(TARGET cuhash PROPERTY POSITION_INDEPENDENT_CODE ON) set_target_properties(cuhash PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
if(NOT WIN32)
set_property(TARGET cuhash PROPERTY POSITION_INDEPENDENT_CODE ON)
endif()
target_link_libraries(cuhash PRIVATE ${ALL_LIBS}) target_link_libraries(cuhash PRIVATE ${ALL_LIBS})
install (TARGETS cuhash DESTINATION lib) install (TARGETS cuhash DESTINATION lib)
if (SPCONV_BuildTests) if (SPCONV_BuildTests)
add_executable(cuhash_test main.cc) add_executable(cuhash_test main.cc)
target_include_directories(cuhash_test PRIVATE ${ALL_INCLUDE} ) target_include_directories(cuhash_test PRIVATE ${ALL_INCLUDE} )
set_property(TARGET cuhash_test PROPERTY CUDA_STANDARD 14) set_property(TARGET cuhash_test PROPERTY CUDA_STANDARD 14)
set_property(TARGET cuhash_test PROPERTY CXX_STANDARD 14) set_property(TARGET cuhash_test PROPERTY CXX_STANDARD 14)
......
...@@ -136,6 +136,8 @@ Index getIndicePairsDeConv(tv::TensorView<const Index> indicesIn, ...@@ -136,6 +136,8 @@ Index getIndicePairsDeConv(tv::TensorView<const Index> indicesIn,
return numAct; return numAct;
} }
#ifndef TV_WINDOWS
template <typename Index, typename IndexGrid, unsigned NDim> template <typename Index, typename IndexGrid, unsigned NDim>
Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn, Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
tv::TensorView<IndexGrid> gridsOut, tv::TensorView<IndexGrid> gridsOut,
...@@ -194,6 +196,58 @@ Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn, ...@@ -194,6 +196,58 @@ Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
}); });
return numActIn; return numActIn;
} }
#else
template <typename Index, typename IndexGrid, unsigned NDim>
Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const Index *const kernelSize,
const Index *const stride, const Index *const padding,
const Index *dilation, const Index *const outSpatialShape) {
Index numAct = 0;
auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
// Index validPoints[kernelVolume * (NDim + 1)];
std::vector<Index> validPoints_(kernelVolume * (NDim + 1));
Index* validPoints = validPoints_.data();
Index *pointPtr = nullptr;
Index index = 0;
for (int j = 0; j < numActIn; ++j) {
index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + j * (NDim + 1) + 1,
outSpatialShape) +
spatialVolume * indicesIn(j, 0);
gridsOut[index] = j;
}
for (int j = 0; j < numActIn; ++j) {
numValidPoints = getValidOutPos<Index, NDim>(
indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,
dilation, outSpatialShape, validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +
spatialVolume * indicesIn(j, 0);
if (gridsOut[index] > -1) {
indicePairs(offset, 0, indiceNum[offset]) = j;
indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];
}
}
}
return numActIn;
}
#endif
namespace functor { namespace functor {
template <typename Index, typename IndexGrid, unsigned NDim> template <typename Index, typename IndexGrid, unsigned NDim>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment