Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4f4aba48
Commit
4f4aba48
authored
Sep 24, 2019
by
Chao Liu
Browse files
adding GetLinearDimensionMask()
parent
545d9305
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
85 deletions
+146
-85
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
...cit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
+1
-3
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
...ernel/include/tensor_description/tensor_coordinate_v2.hpp
+5
-0
composable_kernel/include/tensor_description/tensor_descriptor.hpp
...e_kernel/include/tensor_description/tensor_descriptor.hpp
+72
-62
composable_kernel/include/utility/functional.hpp
composable_kernel/include/utility/functional.hpp
+12
-0
composable_kernel/include/utility/sequence.hpp
composable_kernel/include/utility/sequence.hpp
+56
-20
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
View file @
4f4aba48
...
@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
...
@@ -440,9 +440,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
0
,
0
,
b_thread_data_on_global
,
b_thread_data_on_global
,
0
})
0
})
#if 0
#if 1
.Run_generic
#elif
1
.
template
Run_generic
<
Float
,
address_space_t
::
generic
,
address_space_t
::
global
>
.
template
Run_generic
<
Float
,
address_space_t
::
generic
,
address_space_t
::
global
>
#elif 1
#elif 1
.
template
Run_optimized_dst_address_calculation
<
Float
,
address_space_t
::
global
>
.
template
Run_optimized_dst_address_calculation
<
Float
,
address_space_t
::
global
>
...
...
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
View file @
4f4aba48
...
@@ -45,6 +45,7 @@ struct NativeTensorCoordinate
...
@@ -45,6 +45,7 @@ struct NativeTensorCoordinate
__host__
__device__
constexpr
type
operator
+=
(
const
Index
&
idx_diff
)
__host__
__device__
constexpr
type
operator
+=
(
const
Index
&
idx_diff
)
{
{
// mIndex is updated here, but some (or all) of its entries may never be used
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex
+=
idx_diff
;
mIndex
+=
idx_diff
;
mOffset
+=
tensor_desc_type
::
CalculateOffsetDiff
(
idx_diff
);
mOffset
+=
tensor_desc_type
::
CalculateOffsetDiff
(
idx_diff
);
...
@@ -55,6 +56,7 @@ struct NativeTensorCoordinate
...
@@ -55,6 +56,7 @@ struct NativeTensorCoordinate
__host__
__device__
constexpr
type
operator
-=
(
const
Index
&
idx_diff
)
__host__
__device__
constexpr
type
operator
-=
(
const
Index
&
idx_diff
)
{
{
// mIndex is updated here, but some (or all) of its entries may never be used
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndex
-=
idx_diff
;
mIndex
-=
idx_diff
;
mOffset
-=
tensor_desc_type
::
CalculateOffsetDiff
(
idx_diff
);
mOffset
-=
tensor_desc_type
::
CalculateOffsetDiff
(
idx_diff
);
...
@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate
...
@@ -136,6 +138,7 @@ struct TransformedTensorCoordinate
idx_up_diff
,
GetIndex
(),
GetLowerCoordinate
().
GetIndex
());
idx_up_diff
,
GetIndex
(),
GetLowerCoordinate
().
GetIndex
());
// mIndexUp is updated here, but some (or all) of its entries may never be used
// mIndexUp is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp
+=
idx_up_diff
;
mIndexUp
+=
idx_up_diff
;
return
*
this
;
return
*
this
;
...
@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate
...
@@ -146,6 +149,8 @@ struct TransformedTensorCoordinate
mCoordLow
-=
tensor_desc_type
::
CalculateLowerIndexDiff
(
mCoordLow
-=
tensor_desc_type
::
CalculateLowerIndexDiff
(
idx_up_diff
,
GetIndex
(),
GetLowerCoordinate
().
GetIndex
());
idx_up_diff
,
GetIndex
(),
GetLowerCoordinate
().
GetIndex
());
// mIndex is updated here, but some (or all) of its entries may never be used
// compiler should remove those entries as dead code
mIndexUp
-=
idx_up_diff
;
mIndexUp
-=
idx_up_diff
;
return
*
this
;
return
*
this
;
...
...
composable_kernel/include/tensor_description/tensor_descriptor.hpp
View file @
4f4aba48
...
@@ -101,12 +101,12 @@ struct NativeTensorDescriptor
...
@@ -101,12 +101,12 @@ struct NativeTensorDescriptor
return
true
;
return
true
;
}
}
__host__
__device__
static
constexpr
auto
Get
MaskOf
LinearDimension
s
()
__host__
__device__
static
constexpr
auto
GetLinearDimension
Mask
()
{
{
return
typename
uniform_sequence_gen
<
nDim
,
1
>::
type
{};
return
typename
uniform_sequence_gen
<
nDim
,
1
>::
type
{};
}
}
__host__
__device__
static
constexpr
auto
Get
MaskOf
NonLinearDimension
s
()
__host__
__device__
static
constexpr
auto
GetNonLinearDimension
Mask
()
{
{
return
typename
uniform_sequence_gen
<
nDim
,
0
>::
type
{};
return
typename
uniform_sequence_gen
<
nDim
,
0
>::
type
{};
}
}
...
@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor
...
@@ -353,18 +353,27 @@ struct TransformedTensorDescriptor
return
GetLowerTensorDescriptor
().
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
return
GetLowerTensorDescriptor
().
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
}
#if
0
#if
1
struct lambda_sequence_logic
_or
struct
lambda_sequence_logic
al_and
{
{
template
<
typename
...
Seqs
>
template
<
typename
...
Seqs
>
__host__ __device__ constexpr auto operator()(Seqs... seqs) const
__host__
__device__
constexpr
auto
operator
()(
Seqs
...)
const
{
return
typename
sequence_reduce
<
logical_and
<
index_t
>
,
Seqs
...
>::
type
{};
}
};
template
<
typename
T
>
struct
lambda_is_true
{
__host__
__device__
constexpr
auto
operator
()(
const
T
&
x
)
const
{
{
// TODO:
should use math::logic_or<bool>, after
Sequence can take bool
// TODO:
remove static_cast once
Sequence can take bool
as entries
return
typename sequence_reduce<math::logic_or<bool>, Seqs...>::type{}
;
return
static_cast
<
bool
>
(
x
)
==
true
;
}
}
};
};
struct lambda_
1
struct
lambda_
get_linear_dimension_mask_of_single_tranform
{
{
// check only one transform at a time
// check only one transform at a time
template
<
typename
Transform
,
typename
LowDimensionId
,
typename
UpDimensionId
>
template
<
typename
Transform
,
typename
LowDimensionId
,
typename
UpDimensionId
>
...
@@ -372,73 +381,73 @@ struct TransformedTensorDescriptor
...
@@ -372,73 +381,73 @@ struct TransformedTensorDescriptor
operator
()(
const
Transform
&
tran
,
LowDimensionId
,
UpDimensionId
)
const
operator
()(
const
Transform
&
tran
,
LowDimensionId
,
UpDimensionId
)
const
{
{
// judge if transformation is linear
// judge if transformation is linear
constexpr bool is_linear_transform =
t
ran
.
IsLinearTransform();
constexpr
bool
is_linear_transform
=
T
ran
sform
::
IsLinearTransform
();
// judge if all lower dimension are linear
// judge if all lower dimension are linear
constexpr bool is_all_low_dim_linear = math::reduce_on_sequence(
constexpr
bool
are_all_low_dim_linear
=
sequence_all_of
(
pick_sequence_elements_by_mask(
pick_sequence_elements_by_ids
(
GetLowerTensorDescriptor
().
GetLinearDimensionMask
(),
GetLowerTensorDescriptor().GetMaskOfLinearDimensions(), LowDimensionId{}),
LowDimensionId
{}),
math::logic_and<bool>{},
lambda_is_true
<
index_t
>
{});
integral_constant<bool, true>{});
// judge if upper dimenisons are linear
constexpr bool is_up_dim_nonlinear = !(is_linear_transform && is_all_low_dim_linear);
constexpr auto value_sequence =
// create linear mask for upper dimensions
typename uniform_sequence_gen<tran.GetNumOfUpperDimension(),
constexpr
bool
are_up_dim_linear
=
is_linear_transform
&&
are_all_low_dim_linear
;
is_up_dim_nonlinear>::type{};
constexpr auto mask_of_up_nonlinear_dims = modifiy_sequence(
constexpr
auto
mask_of_up_linear_dims
=
modifiy_sequence_by_ids
(
typename uniform_sequence_gen<nDimUp, 0>::type{}, value_sequence, UpDimensionId{});
typename
uniform_sequence_gen
<
nDimUp
,
0
>::
type
{},
typename
uniform_sequence_gen
<
UpDimensionId
::
Size
(),
1
>::
type
{},
UpDimensionId
{});
return mask_of_up_nonlinear_dims;
return
mask_of_up_linear_dims
;
};
}
};
__host__ __device__ static constexpr bool GetMaskOfNonLinearDimensions()
__host__
__device__
static
constexpr
auto
GetLinearDimensionMask
()
{
{
// create tuple of linear dimension masks, for all transformations
// create tuple of linear dimension masks, for all transformations
constexpr auto tuple_of_nonlinear_dimension_mask =
constexpr
auto
tuple_of_linear_dimension_mask
=
transform_tuples(lambda_1{}, Transforms{}, LowDimensionIds{}, UpDimensionIds{});
transform_tuples
(
lambda_get_linear_dimension_mask_of_single_tranform
{},
Transforms
{},
LowDimensionIds
{},
UpDimensionIds
{});
// reduce tuple of masks into one mask
// reduce tuple of masks into one mask
constexpr auto
non
linear_dimension_mask =
constexpr
auto
linear_dimension_mask
=
unpack(lambda_sequence_logic
_or
{}, tuple_of_
non
linear_dimension_mask);
unpack
(
lambda_sequence_logic
al_and
{},
tuple_of_linear_dimension_mask
);
return
non
linear_dimension_mask;
return
linear_dimension_mask
;
}
}
__host__ __device__ static constexpr
bool GetMaskOf
LinearDimension
s
()
__host__
__device__
static
constexpr
auto
GetNon
LinearDimension
Mask
()
{
{
return Get
MaskOfNon
LinearDimension
s
().Transform(
math::
logic_not<
bool
>{});
return
GetLinearDimension
Mask
().
Transform
(
logic
al
_not
<
index_t
>
{});
}
}
template <index_t IDim>
template
<
index_t
IDim
>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
__host__
__device__
static
constexpr
bool
IsLinearDimension
(
Number
<
IDim
>
)
{
{
return Get
MaskOf
LinearDimension
s
().At(Number<IDim>{});
return
GetLinearDimension
Mask
().
At
(
Number
<
IDim
>
{});
}
}
__host__ __device__ static constexpr auto GetLinearDimensions()
__host__
__device__
static
constexpr
auto
GetLinearDimensions
()
{
{
constexpr auto linear_dimension_mask = Get
MaskOfLien
arDimension
s
();
constexpr
auto
linear_dimension_mask
=
Get
Line
arDimension
Mask
();
return pick_sequence_elements_by_mask(
return
pick_sequence_elements_by_mask
(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, linear_dimension_mask);
typename
arithmetic_sequence_gen
<
0
,
nDimUp
,
1
>::
type
{},
linear_dimension_mask
);
}
}
__host__ __device__ static constexpr auto GetNonLinearDimensions()
__host__
__device__
static
constexpr
auto
GetNonLinearDimensions
()
{
{
constexpr auto nonlinear_dimension_mask =
constexpr
auto
nonlinear_dimension_mask
=
GetNonLinearDimensionMask
();
GetMaskOfLienarDimensions().Transform(math::logic_not<index_t>{});
return pick_sequence_elements_by_mask(
return
pick_sequence_elements_by_mask
(
typename arithmetic_sequence_gen<0, nDimUp, 1>::type{}, nonlinear_dimension_mask);
typename
arithmetic_sequence_gen
<
0
,
nDimUp
,
1
>::
type
{},
nonlinear_dimension_mask
);
}
}
__host__ __device__ static constexpr auto GetNonLinearIndependentDimensionGroups()
__host__
__device__
static
constexpr
auto
GetNonLinearIndependentDimensionGroups
()
{
{
// not implemented
// not implemented
}
}
#endif
#endif
__host__
__device__
static
constexpr
bool
__host__
__device__
static
constexpr
bool
...
@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor
...
@@ -457,9 +466,10 @@ struct TransformedTensorDescriptor
return
flag
;
return
flag
;
}
}
// Whenever this function is called, it will call CalculateLowerIndex() recursively
// Whenever this function is called, it will call CalculateLowerIndex() recursively
.
// If you have created a tensor coordinate already, instead of calling this function,
// If you have created a tensor coordinate already, instead of calling this function,
// you should call TransformedTensorCoordinate::IsUpperIndexMappedToValidOffset()
// you should call TensorCoordinate::IsUpperIndexMappedToValidOffset() which would
// be less expensive.
__host__
__device__
static
constexpr
bool
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidOffset
(
const
UpperIndex
&
idx_up
)
IsUpperIndexMappedToValidOffset
(
const
UpperIndex
&
idx_up
)
{
{
...
...
composable_kernel/include/utility/functional.hpp
View file @
4f4aba48
...
@@ -25,6 +25,18 @@ struct swallow
...
@@ -25,6 +25,18 @@ struct swallow
}
}
};
};
template
<
typename
T
>
struct
logical_and
{
constexpr
bool
operator
()(
const
T
&
x
,
const
T
&
y
)
const
{
return
x
&&
y
;
}
};
template
<
typename
T
>
struct
logical_or
{
constexpr
bool
operator
()(
const
T
&
x
,
const
T
&
y
)
const
{
return
x
||
y
;
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
logical_not
struct
logical_not
{
{
...
...
composable_kernel/include/utility/sequence.hpp
View file @
4f4aba48
...
@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>>
...
@@ -311,7 +311,7 @@ struct sequence_reverse<Sequence<I0, I1>>
using
type
=
Sequence
<
I1
,
I0
>
;
using
type
=
Sequence
<
I1
,
I0
>
;
};
};
#if
0
#if
1
template
<
typename
Reduce
,
typename
Seq
,
typename
...
Seqs
>
template
<
typename
Reduce
,
typename
Seq
,
typename
...
Seqs
>
struct
sequence_reduce
struct
sequence_reduce
{
{
...
@@ -755,31 +755,64 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I
...
@@ -755,31 +755,64 @@ __host__ __device__ constexpr auto pick_sequence_elements_by_ids(Seq, Sequence<I
return
Sequence
<
Seq
::
At
(
Number
<
Is
>
{})...
>
{};
return
Sequence
<
Seq
::
At
(
Number
<
Is
>
{})...
>
{};
}
}
#if 0
#if 1
namespace
detail
{
template
<
typename
WorkSeq
,
typename
RemainSeq
,
typename
RemainMask
>
struct
pick_sequence_elements_by_mask_impl
{
using
new_work_seq
=
typename
conditional
<
RemainMask
::
Front
(),
decltype
(
WorkSeq
::
PushBack
(
RemainSeq
::
Front
())),
WorkSeq
>::
type
;
using
type
=
typename
pick_sequence_elements_by_mask_impl
<
new_work_seq
,
decltype
(
RemainSeq
::
PopFront
()),
decltype
(
RemainMask
::
PopFront
())
>::
type
;
};
template
<
typename
WorkSeq
>
struct
pick_sequence_elements_by_mask_impl
<
WorkSeq
,
Sequence
<>
,
Sequence
<>>
{
using
type
=
WorkSeq
;
};
}
// namespace detail
template
<
typename
Seq
,
typename
Mask
>
template
<
typename
Seq
,
typename
Mask
>
__host__
__device__
constexpr
auto
pick_sequence_elements_by_mask
(
Seq
,
Mask
)
__host__
__device__
constexpr
auto
pick_sequence_elements_by_mask
(
Seq
,
Mask
)
{
{
// not implemented
static_assert
(
Seq
::
Size
()
==
Mask
::
Size
(),
"wrong!"
);
return
typename
detail
::
pick_sequence_elements_by_mask_impl
<
Sequence
<>
,
Seq
,
Mask
>::
type
{};
}
}
#endif
template
<
typename
Seq
,
typename
Reduce
>
namespace
detail
{
struct
lambda_reduce_on_sequence
template
<
typename
WorkSeq
,
typename
RemainValues
,
typename
RemainIds
>
struct
modify_sequence_elements_by_ids_impl
{
{
const
Reduce
&
f
;
using
new_work_seq
=
decltype
(
WorkSeq
::
Modify
(
RemainIds
::
Front
(),
RemainValues
::
Front
()));
index_t
&
result
;
__host__
__device__
constexpr
lambda_reduce_on_sequence
(
const
Reduce
&
f_
,
index_t
&
result_
)
using
type
=
:
f
(
f_
),
result
(
result_
)
typename
modify_sequence_elements_by_ids_impl
<
new_work_seq
,
{
decltype
(
RemainValues
::
PopFront
()),
}
decltype
(
RemainIds
::
PopFront
())
>::
type
;
};
template
<
typename
IDim
>
template
<
typename
WorkSeq
>
__host__
__device__
constexpr
index_t
operator
()(
IDim
)
const
struct
modify_sequence_elements_by_ids_impl
<
WorkSeq
,
Sequence
<>
,
Sequence
<>>
{
{
return
result
=
f
(
result
,
Seq
::
At
(
IDim
{}));
using
type
=
WorkSeq
;
}
};
};
}
// namespace detail
template
<
typename
Seq
,
typename
Values
,
typename
Ids
>
__host__
__device__
constexpr
auto
modify_sequence_elements_by_ids
(
Seq
,
Values
,
Ids
)
{
static_assert
(
Values
::
Size
()
==
Ids
::
Size
()
&&
Seq
::
Size
()
>=
Values
::
Size
(),
"wrong!"
);
return
typename
detail
::
modify_sequence_elements_by_ids_impl
<
Seq
,
Values
,
Ids
>::
type
{};
}
#endif
template
<
typename
Seq
,
typename
Reduce
,
index_t
Init
>
template
<
typename
Seq
,
typename
Reduce
,
index_t
Init
>
__host__
__device__
constexpr
index_t
__host__
__device__
constexpr
index_t
...
@@ -787,14 +820,17 @@ reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/)
...
@@ -787,14 +820,17 @@ reduce_on_sequence(Seq, Reduce f, Number<Init> /*initial_value*/)
{
{
index_t
result
=
Init
;
index_t
result
=
Init
;
static_for
<
0
,
Seq
::
Size
(),
1
>
{}(
lambda_reduce_on_sequence
<
Seq
,
Reduce
>
(
f
,
result
));
for
(
index_t
i
=
0
;
i
<
Seq
::
Size
();
++
i
)
{
result
=
f
(
result
,
Seq
::
At
(
i
));
}
return
result
;
return
result
;
}
}
// TODO: a generic any_of for any container
// TODO: a generic any_of for any container
template
<
typename
Seq
,
typename
F
>
template
<
typename
Seq
,
typename
F
>
__host__
__device__
constexpr
bool
sequence_any_of
(
Seq
,
F
f
/*initial_value*/
)
__host__
__device__
constexpr
bool
sequence_any_of
(
Seq
,
F
f
)
{
{
bool
flag
=
false
;
bool
flag
=
false
;
...
@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
...
@@ -808,7 +844,7 @@ __host__ __device__ constexpr bool sequence_any_of(Seq, F f /*initial_value*/)
// TODO: a generic all_of for any container
// TODO: a generic all_of for any container
template
<
typename
Seq
,
typename
F
>
template
<
typename
Seq
,
typename
F
>
__host__
__device__
constexpr
bool
sequence_all_of
(
Seq
,
F
f
/*initial_value*/
)
__host__
__device__
constexpr
bool
sequence_all_of
(
Seq
,
F
f
)
{
{
bool
flag
=
true
;
bool
flag
=
true
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment