Commit fc38c00d authored by yan.yan's avatar yan.yan
Browse files

working on file too long problem in windows

parent 9f9d5b79
[build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.5", "cumm>=0.1.5"]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.2.7", "cumm>=0.1.6"]
build-backend = "setuptools.build_meta"
......@@ -448,7 +448,7 @@ class SparseConvIndicesKernel(pccm.ParameterizedClass):
int filter_offset = blockIdx.y;
uint32_t filter_mask_out = (1u << (filter_offset));
uint32_t filter_mask_in = (1u << (RS - 1 - filter_offset));
uint32_t filter_mask_center = (1u << (RS / 2));
// uint32_t filter_mask_center = (1u << (RS / 2));
loc_iter.set_filter_offset(filter_offset);
int indices_pair_size_mul_RS = indices_pair_size * RS;
......
......@@ -112,15 +112,15 @@ class IndiceMaxPool(pccm.Class):
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
constexpr int NumFeatures = TV_DECLTYPE(V)::value;
constexpr int Num0 = MaxThreads / NumFeatures;
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
constexpr int NumFeatures = 16;
constexpr int Num0 = MaxThreads / NumFeatures;
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
......@@ -154,15 +154,15 @@ class IndiceMaxPool(pccm.Class):
bool found = tv::dispatch_int_noexcept<512, 256, 128, 64, 32, 16>(out.dim(1), [](int my, int expect){{return my >= expect;}}, [&](auto V){{
// if out.dim(1) > value in list above, run this function.
// if a value is found, other value won't be executed.
constexpr int NumFeatures = TV_DECLTYPE(V)::value;
constexpr int Num0 = MaxThreads / NumFeatures;
int NumFeatures = TV_DECLTYPE(V)::value;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
}});
if (!found){{
constexpr int NumFeatures = 16;
constexpr int Num0 = MaxThreads / NumFeatures;
int NumFeatures = 16;
int Num0 = MaxThreads / NumFeatures;
dim3 blocks(tv::div_up(out.dim(1), NumFeatures), tv::div_up(nhot, Num0));
dim3 threads(NumFeatures, Num0);
launcher = tv::cuda::Launch(blocks, threads, cudastream);
......
......@@ -21,3 +21,8 @@ docker run --rm -it -e PLAT=manylinux2014_x86_64 -v `pwd`:/io -v $HOME:/myhome s
/io/tools/build-wheels.sh
```
## Windows C++ Tips
* cuda attributes such as ```__device__``` must put before return type. when you see ```warning: __declspec attributes ignored```, this means ```__device__``` is ignored because you put it after return type, then cause error.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment