Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3abe105f
"...test_cli/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "4c916d344d0878129deb8a701d0ba9ff6cf5a4a2"
Commit
3abe105f
authored
Oct 10, 2020
by
Chao Liu
Browse files
make DynamicTensorDescriptor constexpr
parent
2feca7e0
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
81 additions
and
26 deletions
+81
-26
composable_kernel/include/kernel_algorithm/dynamic_gridwise_col2im_gemmkgemmn_nchw.hpp
...nel_algorithm/dynamic_gridwise_col2im_gemmkgemmn_nchw.hpp
+2
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+13
-11
composable_kernel/include/tensor_operation/blockwise_dynamic_tensor_slice_transfer.hpp
...sor_operation/blockwise_dynamic_tensor_slice_transfer.hpp
+9
-8
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
...or_operation/threadwise_dynamic_tensor_slice_transfer.hpp
+8
-4
composable_kernel/include/utility/container_element_picker.hpp
...sable_kernel/include/utility/container_element_picker.hpp
+47
-0
driver/src/col2im_driver.cpp
driver/src/col2im_driver.cpp
+2
-3
No files found.
composable_kernel/include/kernel_algorithm/dynamic_gridwise_col2im_gemmkgemmn_nchw.hpp
View file @
3abe105f
...
@@ -139,7 +139,9 @@ struct DynamicGridwiseCol2Im_gemmkgemmn_nchw
...
@@ -139,7 +139,9 @@ struct DynamicGridwiseCol2Im_gemmkgemmn_nchw
InMemoryDataOperation
::
AtomicAdd
,
InMemoryDataOperation
::
AtomicAdd
,
1
,
1
,
1
>
(
1
>
(
col_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
gemmn_block_data_on_global
),
make_multi_index
(
0
,
gemmn_block_data_on_global
),
img_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
gemmn_block_data_on_global
));
make_multi_index
(
0
,
gemmn_block_data_on_global
));
auto
col_gemmk_gemmn_coord
=
auto
col_gemmk_gemmn_coord
=
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
3abe105f
...
@@ -83,8 +83,7 @@ struct DynamicTensorDescriptor
...
@@ -83,8 +83,7 @@ struct DynamicTensorDescriptor
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
(
const
Transforms
&
transforms
,
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
index_t
element_space_size
)
:
transforms_
{
transforms
},
:
transforms_
{
transforms
},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)}
visible_lengths_
{
hidden_lengths_
}
{
{
static_assert
(
Transforms
::
Size
()
==
ntransform_
&&
static_assert
(
Transforms
::
Size
()
==
ntransform_
&&
LowerDimensionIdss
::
Size
()
==
ntransform_
&&
LowerDimensionIdss
::
Size
()
==
ntransform_
&&
...
@@ -107,10 +106,14 @@ struct DynamicTensorDescriptor
...
@@ -107,10 +106,14 @@ struct DynamicTensorDescriptor
template
<
index_t
IDim
>
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
{
return
visible
_lengths_
[
Number
<
IDim
>
{}];
return
hidden
_lengths_
[
VisibleDimensionIds
::
At
(
Number
<
IDim
>
{}
)
];
}
}
__host__
__device__
constexpr
const
auto
&
GetLengths
()
const
{
return
visible_lengths_
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
unpack
([
&
](
auto
...
is
)
constexpr
{
return
make_multi_index
(
GetLength
(
is
)...);
},
VisibleDimensionIds
{});
}
// maybe this result should be saved as a member variable
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
...
@@ -178,8 +181,6 @@ struct DynamicTensorDescriptor
...
@@ -178,8 +181,6 @@ struct DynamicTensorDescriptor
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
// variable lengths_) to save space on stack?
const
HiddenIndex
hidden_lengths_
;
const
HiddenIndex
hidden_lengths_
;
// visible_lenths_ contains a reference to hidden_lengths_
const
ContainerElementPicker
<
const
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
};
};
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
...
@@ -303,10 +304,11 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
...
@@ -303,10 +304,11 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
// new visible dimension's hidden ids
// new visible dimension's hidden ids
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
unpack
([](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
unpack
([](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
constexpr
auto
new_visible_dim_unordered2ordered
=
unpack
(
constexpr
auto
new_visible_dim_unordered2ordered
=
[](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
unpack
([](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
constexpr
auto
new_visible_dim_hidden_ids
=
constexpr
auto
new_visible_dim_hidden_ids
=
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
...
@@ -395,8 +397,8 @@ make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_d
...
@@ -395,8 +397,8 @@ make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_d
// 1) Need to do this transform
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
// computed
const
bool
idx_diff_up_has_non_zero
=
const
bool
idx_diff_up_has_non_zero
=
container_reduce
(
container_reduce
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
constexpr
{
return
a
or
b
;
},
false
);
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
...
...
composable_kernel/include/tensor_operation/blockwise_dynamic_tensor_slice_transfer.hpp
View file @
3abe105f
...
@@ -41,13 +41,13 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
...
@@ -41,13 +41,13 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
const
BlockDstDesc
&
block_dst_desc
,
const
BlockDstDesc
&
block_dst_desc
,
const
Index
&
dst_block_slice_origin
)
const
Index
&
dst_block_slice_origin
)
{
{
static_assert
(
nDim
==
BlockSrcDesc
::
GetNumOfDimension
()
&&
static_assert
(
nDim
==
Block
Dst
Desc
::
GetNumOfDimension
()
&&
nDim
==
remove_reference_t
<
remove_cv_t
<
Block
Src
Desc
>>
::
GetNumOfDimension
()
&&
nDim
==
BlockSliceLengths
::
Size
()
&&
nDim
==
ThreadSliceLengths
::
Size
()
&&
nDim
==
remove_reference_t
<
remove_cv_t
<
BlockDstDesc
>>::
GetNumOfDimension
()
&&
nDim
==
Thread
Cluster
Lengths
::
Size
()
&&
nDim
==
BlockSliceLengths
::
Size
()
&&
nDim
==
Thread
Slice
Lengths
::
Size
()
&&
nDim
==
ThreadClusterArrangeOrder
::
Size
()
&&
nDim
==
ThreadClusterLengths
::
Size
()
&&
nDim
==
ThreadClusterArrangeOrder
::
Size
()
&&
nDim
==
SrcDimAccessOrder
::
Size
()
&&
nDim
==
DstDimAccessOrder
::
Size
(),
nDim
==
SrcDimAccessOrder
::
Size
()
&&
nDim
==
DstDimAccessOrder
::
Size
(),
"wrong! nDim not consistent"
);
"wrong! nDim not consistent"
);
static_assert
(
static_assert
(
is_same
<
BlockSliceLengths
,
decltype
(
ThreadSliceLengths
{}
*
ThreadClusterLengths
{})
>
{},
is_same
<
BlockSliceLengths
,
decltype
(
ThreadSliceLengths
{}
*
ThreadClusterLengths
{})
>
{},
...
@@ -156,7 +156,8 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
...
@@ -156,7 +156,8 @@ struct BlockwiseDynamicTensorSliceTransfer_v1
ThreadwiseRead
threadwise_read_
;
ThreadwiseRead
threadwise_read_
;
ThreadwiseWrite
threadwise_write_
;
ThreadwiseWrite
threadwise_write_
;
static
constexpr
index_t
thread_buffer_element_size_
=
thread_buffer_desc_
.
GetElementSpace
();
static
constexpr
index_t
thread_buffer_element_size_
=
thread_buffer_desc_
.
GetElementSpaceSize
();
BlockSrcData
p_thread_buffer_
[
thread_buffer_element_size_
];
BlockSrcData
p_thread_buffer_
[
thread_buffer_element_size_
];
};
};
...
...
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
View file @
3abe105f
...
@@ -126,16 +126,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1
...
@@ -126,16 +126,20 @@ struct ThreadwiseDynamicTensorSliceTransfer_v1
using
SrcCoordStep
=
decltype
(
make_dynamic_tensor_coordinate_step
(
SrcDesc
{},
Index
{}));
using
SrcCoordStep
=
decltype
(
make_dynamic_tensor_coordinate_step
(
SrcDesc
{},
Index
{}));
using
DstCoordStep
=
decltype
(
make_dynamic_tensor_coordinate_step
(
DstDesc
{},
Index
{}));
using
DstCoordStep
=
decltype
(
make_dynamic_tensor_coordinate_step
(
DstDesc
{},
Index
{}));
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v1
()
=
default
;
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v1
(
const
SrcDesc
&
src_desc
,
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v1
(
const
SrcDesc
&
src_desc
,
const
Index
&
src_slice_origin
,
const
Index
&
src_slice_origin
,
const
DstDesc
&
dst_desc
,
const
DstDesc
&
dst_desc
,
const
Index
&
dst_slice_origin
)
const
Index
&
dst_slice_origin
)
:
src_desc_
(
src_desc
),
:
src_desc_
(
src_desc
),
src_slice_origin_
(
src_slice_origin
),
src_slice_origin_
(
make_dynamic_tensor_coordinate
(
src_desc
,
src_slice_origin
)
)
,
dst_desc_
(
dst_desc
),
dst_desc_
(
dst_desc
),
dst_slice_origin_
(
dst_slice_origin
)
dst_slice_origin_
(
make_dynamic_tensor_coordinate
(
dst_desc
,
dst_slice_origin
))
{
}
__device__
constexpr
ThreadwiseDynamicTensorSliceTransfer_v1
()
:
ThreadwiseDynamicTensorSliceTransfer_v1
(
SrcDesc
{},
make_zero_multi_index
<
nDim
>
(),
DstDesc
{},
make_zero_multi_index
<
nDim
>
())
{
{
}
}
...
...
composable_kernel/include/utility/container_element_picker.hpp
View file @
3abe105f
...
@@ -71,6 +71,47 @@ struct ContainerElementPicker
...
@@ -71,6 +71,47 @@ struct ContainerElementPicker
Arr
&
mArray
;
Arr
&
mArray
;
};
};
// Arr: Array or StaticallyIndexedArray
// Picks: Sequence<...>
template
<
typename
Arr
,
typename
Picks
>
struct
ConstantContainerElementPicker
{
using
type
=
ConstantContainerElementPicker
;
#if 0
using data_type = typename Arr::data_type;
#endif
__host__
__device__
constexpr
ConstantContainerElementPicker
()
=
delete
;
__host__
__device__
explicit
constexpr
ConstantContainerElementPicker
(
const
Arr
&
array
)
:
mArray
{
array
}
{
constexpr
index_t
imax
=
reduce_on_sequence
(
Picks
{},
math
::
maxer
<
index_t
>
{},
Number
<
0
>
{});
static_assert
(
imax
<
Arr
::
Size
(),
"wrong! exceeding # array element"
);
}
__host__
__device__
static
constexpr
auto
Size
()
{
return
Picks
::
Size
();
}
template
<
index_t
I
>
__host__
__device__
constexpr
const
auto
&
At
(
Number
<
I
>
i
)
const
{
static_assert
(
I
<
Size
(),
"wrong!"
);
constexpr
auto
IP
=
Picks
{}[
i
];
return
mArray
[
IP
];
}
template
<
index_t
I
>
__host__
__device__
constexpr
const
auto
&
operator
[](
Number
<
I
>
i
)
const
{
return
At
(
i
);
}
private:
const
Arr
&
mArray
;
};
template
<
typename
Arr
,
typename
Picks
,
typename
X
>
template
<
typename
Arr
,
typename
Picks
,
typename
X
>
__host__
__device__
constexpr
auto
operator
+=
(
ContainerElementPicker
<
Arr
,
Picks
>&
y
,
const
X
&
x
)
__host__
__device__
constexpr
auto
operator
+=
(
ContainerElementPicker
<
Arr
,
Picks
>&
y
,
const
X
&
x
)
{
{
...
@@ -103,5 +144,11 @@ __host__ __device__ constexpr auto pick_container_element(Arr& a, Picks)
...
@@ -103,5 +144,11 @@ __host__ __device__ constexpr auto pick_container_element(Arr& a, Picks)
return
ContainerElementPicker
<
Arr
,
Picks
>
(
a
);
return
ContainerElementPicker
<
Arr
,
Picks
>
(
a
);
}
}
template
<
typename
Arr
,
typename
Picks
>
__host__
__device__
constexpr
auto
pick_container_element
(
const
Arr
&
a
,
Picks
)
{
return
ConstantContainerElementPicker
<
Arr
,
Picks
>
(
a
);
}
}
// namespace ck
}
// namespace ck
#endif
#endif
driver/src/col2im_driver.cpp
View file @
3abe105f
...
@@ -545,7 +545,7 @@ int main(int argc, char* argv[])
...
@@ -545,7 +545,7 @@ int main(int argc, char* argv[])
LeftPads{},
LeftPads{},
RightPads{},
RightPads{},
nrepeat);
nrepeat);
#elif
0
#elif
1
device_dynamic_col2im_gemmkgemmn_nchw
(
col_eb_desc
,
device_dynamic_col2im_gemmkgemmn_nchw
(
col_eb_desc
,
col_eb
,
col_eb
,
img_nchw_desc
,
img_nchw_desc
,
...
@@ -583,8 +583,7 @@ int main(int argc, char* argv[])
...
@@ -583,8 +583,7 @@ int main(int argc, char* argv[])
DynamicTensorDescriptor
<
decltype
(
transforms
),
DynamicTensorDescriptor
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
visible_dim_hidden_ids
)
>
{};
//{transforms, element_space_size};
#endif
#endif
if
(
do_verification
)
if
(
do_verification
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment