Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f0eec07c
"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "54f165aac68b30e033d7da13b168f66cd0a4ccc3"
Commit
f0eec07c
authored
Dec 19, 2019
by
Chao Liu
Browse files
tweaking
parent
89140d16
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
50 additions
and
196 deletions
+50
-196
composable_kernel/include/kernel_algorithm/gridwise_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
...ution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
+1
-1
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+18
-110
composable_kernel/include/tensor_description/tensor_coordinate.hpp
...e_kernel/include/tensor_description/tensor_coordinate.hpp
+4
-16
composable_kernel/include/tensor_description/tensor_descriptor.hpp
...e_kernel/include/tensor_description/tensor_descriptor.hpp
+17
-59
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
...tensor_operation/threadwise_generic_tensor_slice_copy.hpp
+6
-6
driver/src/conv_bwd_data_driver.cpp
driver/src/conv_bwd_data_driver.cpp
+3
-3
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw.hpp
View file @
f0eec07c
...
...
@@ -123,7 +123,7 @@ struct GridwiseConvolutionBackwardDataImplicitGemm_v2r1_nchw_kcyx_nkhw
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
// output tensor
#if
0
// debug
#if
1
// debug
constexpr
auto
out_n_k_hop_wop_global_desc
=
transform_tensor_descriptor
(
out_n_k_ho_wo_global_desc
,
make_tuple
(
...
...
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
f0eec07c
...
...
@@ -41,18 +41,10 @@ struct PassThrough
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
#if 0
__host__ __device__ static constexpr bool
IsUpperIndexMappedToValidLowerIndex(const UpperIndex& /* idx_up */)
{
return true;
}
#else
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
}
#endif
};
// LowerLengths: Sequence<...>
...
...
@@ -64,6 +56,13 @@ struct Pad
using
LowerIndex
=
MultiIndex
<
nDim
>
;
using
UpperIndex
=
MultiIndex
<
nDim
>
;
__host__
__device__
explicit
constexpr
Pad
()
{
static_assert
(
LowerLengths
::
GetSize
()
==
nDim
&&
LeftPads
::
GetSize
()
==
nDim
&&
RightPads
::
GetSize
()
==
nDim
,
"wrong! # of dimensions not consistent"
);
}
__host__
__device__
static
constexpr
auto
GetNumOfLowerDimension
()
{
return
Number
<
nDim
>
{};
}
__host__
__device__
static
constexpr
auto
GetNumOfUpperDimension
()
{
return
Number
<
nDim
>
{};
}
...
...
@@ -88,15 +87,14 @@ struct Pad
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
constexpr
bool
IsUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
idx_up
)
const
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
bool
flag
=
true
;
static_for
<
0
,
nDim
,
1
>
{}([
&
](
auto
idim
)
{
flag
=
flag
&&
(
idx_up
[
idim
]
>=
LeftPads
::
At
(
idim
))
&&
(
idx_up
[
idim
]
<
LeftPads
::
At
(
idim
)
+
LowerLength
s
::
At
(
i
dim
))
;
}
);
for
(
index_t
i
=
0
;
i
<
nDim
;
++
i
)
{
flag
=
flag
&&
LeftPads
::
At
(
i
)
==
0
&&
RightPad
s
::
At
(
i
)
==
0
;
}
return
flag
;
}
...
...
@@ -163,91 +161,6 @@ struct Merge
return
idx_low
;
}
#if 0
// idx_low_diff depends on idx_low_old, so idx_low need to be up-to-date
// If idx_up_diff is known at compile-time, many calculations can be optimized
// away by compiler
// This function assume idx_low_old is not out-of-bound
__host__ __device__ static constexpr auto
CalculateLowerIndexDiff(const UpperIndex& idx_up_diff,
const UpperIndex& /* idx_up_old */,
const LowerIndex& idx_low_old)
{
// do nothing if idx_up_diff == 0
if(idx_up_diff[0] == 0)
{
return make_zero_array<index_t, nDimLow>();
}
// CalculateLowerIndex(idx_up_diff) has multiple integer divisions.
// If idx_up_diff is known at compile-time, the calculation can
// be done at compile-time. However, if idx_up_diff is only known
// at run-time, then the calculation will also be computed at
// run-time, and can be very expensive.
LowerIndex idx_low_new = idx_low_old + CalculateLowerIndex(idx_up_diff);
if(idx_up_diff[0] > 0)
{
bool carry = false;
// do carry check in reversed order, starting from lowest dimension
// don't check the highest dimension
static_for<0, nDimLow - 1, 1>{}([&](auto ireverse) {
constexpr index_t i = nDimLow - 1 - ireverse;
if(carry)
{
++idx_low_new(i);
}
carry = false;
if(idx_low_new[i] >= LowerLengths::At(i))
{
idx_low_new(i) -= LowerLengths::At(i);
carry = true;
}
});
// highest dimension, no out-of-bound check
if(carry)
{
++idx_low_new(0);
}
}
else if(idx_up_diff[0] < 0)
{
bool borrow = false;
// do borrow check in reversed order, starting from lowest dimension
// don't check the highest dimension
static_for<0, nDimLow - 1, 1>{}([&](auto ireverse) {
constexpr index_t i = nDimLow - 1 - ireverse;
if(borrow)
{
--idx_low_new(i);
}
borrow = false;
if(idx_low_new[i] < 0)
{
idx_low_new(i) += LowerLengths::At(i);
borrow = true;
}
});
// highest dimension, no out-of-bound check
if(borrow)
{
--idx_low_new(0);
}
}
return idx_low_new - idx_low_old;
}
#else
// idx_low_diff depends on idx_low_old, so idx_low need to be up-to-date
// If idx_up_diff is known at compile-time, many calculations can be optimized
// away by compiler
...
...
@@ -348,12 +261,10 @@ struct Merge
return
idx_low_new
-
idx_low_old
;
}
}
#endif
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
false
;
}
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
/* idx_up */
)
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
}
...
...
@@ -400,8 +311,7 @@ struct UnMerge
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
/* idx_up */
)
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
}
...
...
@@ -410,7 +320,7 @@ struct UnMerge
// UpperLengths: Sequence<...>
// Coefficients: Sequence<...>
// idx_low = coefficients[0, ...nDimUp-1] * idx_up[0, ...nDimUp-1] + coefficients[nDimUp]
template
<
typename
UpperLengths
,
typename
Coefficients
>
template
<
typename
UpperLengths
,
typename
Coefficients
,
bool
IsAlwaysValidMapping
=
true
>
struct
Embed
{
static
constexpr
index_t
nDimLow
=
1
;
...
...
@@ -456,10 +366,9 @@ struct Embed
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
/* idx_up */
)
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
return
IsAlwaysValidMapping
;
}
};
...
...
@@ -499,8 +408,7 @@ struct Vectorize
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
/* idx_up */
)
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
}
...
...
composable_kernel/include/tensor_description/tensor_coordinate.hpp
View file @
f0eec07c
...
...
@@ -53,6 +53,8 @@ struct NativeTensorCoordinate
__host__
__device__
static
constexpr
auto
GetTensorDescriptor
()
{
return
tensor_desc_type
{};
}
__host__
__device__
constexpr
const
Index
&
GetUpperIndex
()
const
{
return
mIndex
;
}
__host__
__device__
constexpr
const
Index
&
GetIndex
()
const
{
return
mIndex
;
}
__host__
__device__
constexpr
const
index_t
&
GetOffset
()
const
{
return
mOffset
;
}
...
...
@@ -98,9 +100,6 @@ struct NativeTensorCoordinate
return
tensor_desc_type
::
CalculateOffsetDiff
(
idx_diff
);
}
#if 0 // debug
__host__ __device__ static constexpr bool HasValidOffset() { return true; }
#else
// evaluated at run-time
__host__
__device__
constexpr
bool
IsUpperIndexValid
()
const
{
...
...
@@ -117,10 +116,8 @@ struct NativeTensorCoordinate
// evaluated at compile-time
__host__
__device__
static
constexpr
bool
IsOffsetValidAssumingUpperIndexIsValid
()
{
// For native tensor, offset is valid if upper-index is valid
return
true
;
}
#endif
private:
// mIndex may be saved and updated, however, the value of some (or all) of its entries may
...
...
@@ -165,8 +162,6 @@ struct TransformedTensorCoordinate
__host__
__device__
constexpr
const
UpperIndex
&
GetUpperIndex
()
const
{
return
mIndexUp
;
}
__host__
__device__
constexpr
const
LowerIndex
&
GetLowerIndex
()
const
{
return
mIndexLow
.
GetIndex
();
}
__host__
__device__
constexpr
const
UpperIndex
&
GetIndex
()
const
{
return
GetUpperIndex
();
}
__host__
__device__
constexpr
const
index_t
&
GetOffset
()
const
...
...
@@ -230,13 +225,6 @@ struct TransformedTensorCoordinate
return
GetLowerCoordinate
().
CalculateOffsetDiff
(
idx_low_diff
);
}
#if 0 // debug
__host__ __device__ constexpr bool IsUpperIndexMappedToValidOffset() const
{
return tensor_desc_type::IsUpperIndexMappedToValidLowerIndex(GetIndex()) &&
mCoordLow.IsUpperIndexMappedToValidOffset();
}
#else
// evaluated at run-time
__host__
__device__
constexpr
bool
IsUpperIndexValid
()
const
{
...
...
@@ -252,7 +240,8 @@ struct TransformedTensorCoordinate
// most evaluatation is done at comile-time
__host__
__device__
constexpr
bool
IsLowerIndexValidAssumingUpperIndexIsValid
()
const
{
return
tensor_desc_type
::
IsLowerIndexValidAssumingUpperIndexIsValid
(
GetLowerIndex
());
return
tensor_desc_type
::
IsLowerIndexValidAssumingUpperIndexIsValid
(
GetLowerCoordinate
().
GetIndex
());
}
// most evaluatation is done at comile-time
...
...
@@ -261,7 +250,6 @@ struct TransformedTensorCoordinate
return
IsLowerIndexValidAssumingUpperIndexIsValid
()
&&
GetLowerCoordinate
().
IsOffsetValidAssumingUpperIndexIsValid
();
}
#endif
private:
// mIndexUp may be calculated and updated, however, the value of some (or all) of its entries
...
...
composable_kernel/include/tensor_description/tensor_descriptor.hpp
View file @
f0eec07c
...
...
@@ -120,30 +120,18 @@ struct NativeTensorDescriptor
return
Tuple
<>
{};
}
#if 0 // debug
__host__ __device__ static constexpr bool
IsUpperIndexMappedToValidOffset(const Index& /* idx */)
{
return true;
}
#else
// a multi-index is valid if there is a corresponding point for it in the tensor
__host__
__device__
static
constexpr
bool
IsUpperIndexValid
(
const
Index
&
idx
)
{
bool
flag
=
true
;
for
(
index_t
i
=
0
;
i
<
nDim
;
++
i
)
{
flag
=
flag
&&
idx
[
i
]
>=
0
&&
idx
[
i
]
<
GetLengths
()[
i
];
});
return
flag
;
}
}
__host__
__device__
static
constexpr
bool
IsUpperIndexMappedToValidOffset
(
const
Index
&
idx
)
{
return
IsUpperIndexValid
(
idx
)
&&
IsValidUpperIndexAlwaysMappedToValidOffset
();
}
#endif
return
flag
;
}
};
// Tensor descriptor for "transformed tensor"
...
...
@@ -486,41 +474,12 @@ struct TransformedTensorDescriptor
}
#endif
#if 0
__host__ __device__ static constexpr bool
IsUpperIndexMappedToValidLowerIndex(const UpperIndex& idx_up)
{
bool flag = true;
static_for<0, nTransform, 1>{}([&](auto itran) {
constexpr auto tran = Transforms{}.At(itran);
const auto idx_up_part = pick_array_element(idx_up, UpDimensionIds{}.At(itran));
flag = flag && tran.IsUpperIndexMappedToValidLowerIndex(to_array(idx_up_part));
});
return flag;
}
// Whenever this function is called, it will call CalculateLowerIndex() recursively.
// If you have created a tensor coordinate already, instead of calling this function,
// you should call TensorCoordinate::IsUpperIndexMappedToValidOffset() which would
// be less expensive.
__host__ __device__ static constexpr bool
IsUpperIndexMappedToValidOffset(const UpperIndex& idx_up)
{
return IsUpperIndexMappedToValidLowerIndex(idx_up) &&
GetLowerTensorDescriptor().IsUpperIndexMappedToValidOffset(
CalculateLowerIndex(idx_up));
}
#else
//
// a multi-index is valid if there is a corresponding point for it in the tensor
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
for
(
index_t
i
=
0
;
i
<
nDim
;
++
i
)
for
(
index_t
i
=
0
;
i
<
nDim
Up
;
++
i
)
{
flag
=
flag
&&
idx_up
[
i
]
>=
0
&&
idx_up
[
i
]
<
GetLengths
()[
i
];
}
...
...
@@ -528,9 +487,10 @@ struct TransformedTensorDescriptor
return
flag
;
}
// this function tells you: Is lower-index valid, assuming upper index is valid?
__host__
__device__
constexpr
bool
IsLowerIndexValidAssumingUpperIndexIsValid
(
const
LowerIndex
&
idx_low
)
const
// this function is for optimization purpose, it's called by tensor coordinate
// this function tells you: If a lower-index is valid or not, assuming upper index is valid
__host__
__device__
static
constexpr
bool
IsLowerIndexValidAssumingUpperIndexIsValid
(
const
LowerIndex
&
idx_low
)
{
bool
flag
=
true
;
...
...
@@ -540,22 +500,20 @@ struct TransformedTensorDescriptor
// check a indtransformation if it does not always has a valid mapping
if
(
!
tran
.
IsValidUpperIndexAlwaysMappedToValidLowerIndex
())
{
const
auto
idx_low_part
=
to_array
(
pick_array_element
(
idx_low
,
LowerDimensionIds
{}.
At
(
itran
)));
constexpr
auto
low_dims_part
=
LowDimensionIds
{}.
At
(
itran
);
constexpr
auto
low_lengths_part
=
GetLowerTensorDescriptor
().
GetLengths
(
low_dims_part
);
const
auto
idx_low_part
=
to_array
(
pick_array_element
(
idx_low
,
low_dims_part
));
constexpr
auto
lengths_low_part
=
GetLowerTenosrDescriptor
().
GetLengths
()(
LowerDimensionIds
{});
for
(
index_t
i
=
0
;
i
<
LowerDimensionIds
::
Size
();
++
i
)
for
(
index_t
i
=
0
;
i
<
low_dims_part
.
Size
();
++
i
)
{
flag
=
flag
&&
idx_low_part
[
i
]
>=
0
&&
idx_low_part
[
i
]
<
lengths_
low_
part
[
i
];
flag
=
flag
&&
idx_low_part
[
i
]
>=
0
&&
idx_low_part
[
i
]
<
low_
lengths_part
[
i
];
}
}
});
return
flag
;
}
#endif
};
}
// namespace ck
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
View file @
f0eec07c
...
...
@@ -110,7 +110,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// Check src data's valid mapping situation, only check the first data in this src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
if
(
src_coord
.
HasValidOffset
())
if
(
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
SrcData
,
SrcDataPerRead
,
...
...
@@ -142,7 +142,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// Check dst data's valid mapping situation, only check the first data in this dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
if
(
dst_coord
.
HasValidOffset
())
if
(
dst_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
DstData
,
DstDataPerWrite
,
...
...
@@ -260,7 +260,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
if
(
src_coord
.
HasValidOffset
())
if
(
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
SrcData
,
SrcDataPerRead
,
...
...
@@ -299,7 +299,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
if
(
dst_coord
.
HasValidOffset
())
if
(
dst_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
DstData
,
DstDataPerWrite
,
...
...
@@ -399,7 +399,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
if
(
src_coord
.
HasValidOffset
())
if
(
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
SrcData
,
SrcDataPerRead
,
...
...
@@ -444,7 +444,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
if
(
dst_coord
.
HasValidOffset
())
if
(
dst_coord
.
IsOffsetValidAssumingUpperIndexIsValid
())
{
move_data
<
DstData
,
DstDataPerWrite
,
...
...
driver/src/conv_bwd_data_driver.cpp
View file @
f0eec07c
...
...
@@ -21,7 +21,7 @@ int main(int argc, char* argv[])
{
using
namespace
ck
;
#if
1
#if
0
// 1x1
constexpr index_t N = 256;
constexpr index_t C = 1024;
...
...
@@ -36,7 +36,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>;
#elif
1
#elif
0
// 1x7
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
1024
;
...
...
@@ -291,7 +291,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
3
,
0
>
;
using
RightPads
=
Sequence
<
3
,
0
>
;
#elif
0
#elif
1
// 1x7 filter, 0x3 pad, 17x17 input
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
128
;
...
...
driver/src/conv_driver.cpp
View file @
f0eec07c
...
...
@@ -59,7 +59,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
3
>
;
using
RightPads
=
Sequence
<
0
,
3
>
;
#elif
1
#elif
0
// 3x3, 34x34
constexpr
index_t
N
=
64
;
constexpr
index_t
C
=
256
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment