Commit 9f734b3c authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Add template parameter 'InBlockLdsExtraW'

parent 078d1df1
...@@ -82,6 +82,7 @@ template <typename InDataType, ...@@ -82,6 +82,7 @@ template <typename InDataType,
index_t NPerBlock, index_t NPerBlock,
index_t HPerBlock, index_t HPerBlock,
index_t WPerBlock, index_t WPerBlock,
index_t InBlockLdsExtraW,
index_t InScalarPerVector, index_t InScalarPerVector,
index_t OutScalarPerVector> index_t OutScalarPerVector>
struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType, struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType,
...@@ -92,6 +93,7 @@ struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType, ...@@ -92,6 +93,7 @@ struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType,
NPerBlock, NPerBlock,
HPerBlock, HPerBlock,
WPerBlock, WPerBlock,
InBlockLdsExtraW,
InScalarPerVector, InScalarPerVector,
OutScalarPerVector>> OutScalarPerVector>>
{ {
...@@ -147,6 +149,7 @@ struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType, ...@@ -147,6 +149,7 @@ struct DevicePermute : detail::DevicePermuteBase<DevicePermute<InDataType,
NPerBlock, NPerBlock,
HPerBlock, HPerBlock,
WPerBlock, WPerBlock,
InBlockLdsExtraW,
InScalarPerVector, InScalarPerVector,
OutScalarPerVector>; OutScalarPerVector>;
......
...@@ -99,6 +99,7 @@ template <typename InGridDesc, ...@@ -99,6 +99,7 @@ template <typename InGridDesc,
index_t NPerBlock, index_t NPerBlock,
index_t HPerBlock, index_t HPerBlock,
index_t WPerBlock, index_t WPerBlock,
index_t InBlockLdsExtraW,
index_t InScalarPerVector, index_t InScalarPerVector,
index_t OutScalarPerVector> index_t OutScalarPerVector>
struct GridwisePermute struct GridwisePermute
...@@ -117,11 +118,9 @@ struct GridwisePermute ...@@ -117,11 +118,9 @@ struct GridwisePermute
__host__ __device__ static constexpr auto GetInBlockDesc() __host__ __device__ static constexpr auto GetInBlockDesc()
{ {
constexpr index_t InBlockLdsExtraM = 0;
return make_naive_tensor_descriptor(make_tuple(1, Number<HPerBlock>{}, Number<WPerBlock>{}), return make_naive_tensor_descriptor(make_tuple(1, Number<HPerBlock>{}, Number<WPerBlock>{}),
make_tuple(Number<WPerBlock + InBlockLdsExtraM>{}, make_tuple(Number<WPerBlock + InBlockLdsExtraW>{},
Number<WPerBlock + InBlockLdsExtraM>{}, Number<WPerBlock + InBlockLdsExtraW>{},
I1)); I1));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment