Commit fc38c00d authored by yan.yan's avatar yan.yan
Browse files

working on file too long problem in windows

parent 9f9d5b79
[build-system] [build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.5", "cumm>=0.1.5"] requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.7", "cumm>=0.1.6"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
...@@ -448,7 +448,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass): ...@@ -448,7 +448,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
int filter_offset = blockIdx.y; int filter_offset = blockIdx.y;
uint32_t filter_mask_out = (1u << (filter_offset)); uint32_t filter_mask_out = (1u << (filter_offset));
uint32_t filter_mask_in = (1u << (RS - 1 - filter_offset)); uint32_t filter_mask_in = (1u << (RS - 1 - filter_offset));
uint32_t filter_mask_center = (1u << (RS / 2)); // uint32_t filter_mask_center = (1u << (RS / 2));
loc_iter.set_filter_offset(filter_offset); loc_iter.set_filter_offset(filter_offset);
int indices_pair_size_mul_RS = indices_pair_size * RS; int indices_pair_size_mul_RS = indices_pair_size * RS;
......
...@@ -112,15 +112,15 @@ class IndiceMaxPool(pccm.Class): ...@@ -112,15 +112,15 @@ class IndiceMaxPool(pccm.Class):
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{ bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function. // if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed. // if a value is found, other value won't be executed.
constexpr int NumFeatures = TV_DECLTYPE(V)::value; int NumFeatures = TV_DECLTYPE(V)::value;
constexpr int Num0 = MaxThreads / NumFeatures; int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0)); dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0); dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream); launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}); }});
if (!found){{ if (!found){{
constexpr int NumFeatures = 16; int NumFeatures = 16;
constexpr int Num0 = MaxThreads / NumFeatures; int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0)); dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0); dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream); launcher = tv::cuda::Launch(blocks, threads, cudastream);
...@@ -154,15 +154,15 @@ class IndiceMaxPool(pccm.Class): ...@@ -154,15 +154,15 @@ class IndiceMaxPool(pccm.Class):
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{ bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function. // if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed. // if a value is found, other value won't be executed.
constexpr int NumFeatures = TV_DECLTYPE(V)::value; int NumFeatures = TV_DECLTYPE(V)::value;
constexpr int Num0 = MaxThreads / NumFeatures; int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0)); dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0); dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream); launcher = tv::cuda::Launch(blocks, threads, cudastream);
}}); }});
if (!found){{ if (!found){{
constexpr int NumFeatures = 16; int NumFeatures = 16;
constexpr int Num0 = MaxThreads / NumFeatures; int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0)); dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0); dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream); launcher = tv::cuda::Launch(blocks, threads, cudastream);
......
...@@ -20,4 +20,9 @@ ...@@ -20,4 +20,9 @@
docker run --rm -it -e PLAT=manylinux2014_x86_64 -v `pwd`:/io -v $HOME:/myhome scrin/manylinux2014-cuda:cu114-devel bash docker run --rm -it -e PLAT=manylinux2014_x86_64 -v `pwd`:/io -v $HOME:/myhome scrin/manylinux2014-cuda:cu114-devel bash
/io/tools/build-wheels.sh /io/tools/build-wheels.sh
``` ```
\ No newline at end of file
## Windows C++ Tips
* cuda attributes such as ```__device__``` must put before return type. when you see ```warning: __declspec attributes ignored```, this means ```__device__``` is ignored because you put it after return type, then cause error.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment