Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e00de11e
Commit
e00de11e
authored
Feb 07, 2021
by
Chao Liu
Browse files
changing class to POD
parent
f16356d4
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
142 additions
and
185 deletions
+142
-185
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+68
-0
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+28
-118
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+11
-7
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+5
-5
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+24
-49
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+5
-5
script/cmake-rocm3.7.sh
script/cmake-rocm3.7.sh
+1
-1
No files found.
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
e00de11e
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
namespace
ck
{
namespace
ck
{
#if 1
// GemmM = K
// GemmM = K
// GemmN = N * Ho * Wo
// GemmN = N * Ho * Wo
// GemmK = C * Y * X
// GemmK = C * Y * X
...
@@ -903,6 +904,73 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
...
@@ -903,6 +904,73 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_1x1
}
}
}
}
};
};
#else
template
<
index_t
BlockSize
,
typename
Float
,
typename
AccFloat
,
index_t
GemmMPerBlock
,
index_t
GemmNPerBlock
,
index_t
GemmKPerBlock
,
index_t
GemmMPerThread
,
index_t
GemmNPerThread
,
index_t
GemmKPerThread
,
index_t
GemmMLevel0Cluster
,
index_t
GemmNLevel0Cluster
,
index_t
GemmMLevel1Cluster
,
index_t
GemmNLevel1Cluster
,
typename
GemmABlockTransferThreadSliceLengths_GemmK_GemmM
,
typename
GemmABlockTransferThreadClusterLengths_GemmK_GemmM
,
index_t
GemmABlockTransferSrcScalarPerVector_GemmK
,
index_t
GemmABlockTransferDstScalarPerVector_GemmM
,
typename
GemmBBlockTransferThreadSliceLengths_GemmK_GemmN
,
typename
GemmBBlockTransferThreadClusterLengths_GemmK_GemmN
,
index_t
GemmBBlockTransferSrcScalarPerVector_GemmN
,
index_t
GemmBBlockTransferDstScalarPerVector_GemmN
,
index_t
GemmCThreadTransferDstScalarPerVector_GemmN1
>
struct
DriverDynamicConvolutionForwardImplicitGemm_v4r4_nchw_kcyx_nkhw_pad
{
template
<
typename
...
Wei
,
typename
...
In
,
typename
...
Out
>
__host__
void
Run
(
const
DynamicTensorDescriptor
<
Wei
...
>&
wei_k_c_y_x_global_desc
,
const
DynamicTensorDescriptor
<
In
...
>&
in_n_c_hi_wi_global_desc
,
const
DynamicTensorDescriptor
<
Out
...
>&
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_right_pads
,
const
Float
*
__restrict__
p_wei_global
,
const
Float
*
__restrict__
p_in_global
,
Float
*
__restrict__
p_out_global
)
const
{
constexpr
auto
pass
=
DynamicPassThrough
();
constexpr
auto
pad
=
DynamicLeftPad
<
false
>
();
constexpr
auto
freeze
=
DynamicFreeze
();
constexpr
auto
desc
=
ck
::
DynamicTensorDescriptor
<
ck
::
Tuple
<
ck
::
DynamicUnMerge
<
4
,
false
>>
,
ck
::
Tuple
<
ck
::
Sequence
<
0
>>
,
ck
::
Tuple
<
ck
::
Sequence
<
1
,
2
,
3
,
4
>>
,
ck
::
Sequence
<
1
,
2
,
3
,
4
>>
();
static_assert
(
std
::
is_trivial
<
Sequence
<
1
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
detail
::
TupleElementKey
<
0
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
detail
::
TupleElement
<
detail
::
TupleElementKey
<
0
>
,
index_t
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
detail
::
TupleImpl
<
Sequence
<
0
>
,
index_t
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
Tuple
<
index_t
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
MultiIndex
<
2
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
MultiIndex
<
1
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
DynamicPassThrough
>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
DynamicUnMerge
<
2
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
DynamicFreeze
>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
remove_cv_t
<
decltype
(
desc
)
>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
remove_reference_t
<
remove_cv_t
<
decltype
(
conv_strides
)
>>>::
value
,
"wrong"
);
static_assert
(
std
::
is_trivial
<
remove_reference_t
<
remove_cv_t
<
decltype
(
wei_k_c_y_x_global_desc
)
>>>::
value
,
"wrong"
);
}
};
#endif
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
e00de11e
...
@@ -11,25 +11,15 @@ struct DynamicPassThrough
...
@@ -11,25 +11,15 @@ struct DynamicPassThrough
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
__host__
__device__
constexpr
DynamicPassThrough
(
const
DynamicPassThrough
&
other
)
__host__
__device__
constexpr
DynamicPassThrough
()
=
default
;
:
up_lengths_
{
other
.
up_lengths_
}
{
}
__host__
__device__
constexpr
DynamicPassThrough
(
DynamicPassThrough
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
}
{
}
__host__
__device__
constexpr
DynamicPassThrough
(
const
index_t
&
low_length
)
__host__
__device__
constexpr
DynamicPassThrough
(
const
index_t
&
low_length
)
:
up_lengths_
{
make_multi_index
(
low_length
)}
:
up_lengths_
{
make_multi_index
(
low_length
)}
{
{
}
}
__host__
__device__
constexpr
DynamicPassThrough
()
:
up_lengths_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
...
@@ -88,19 +78,11 @@ struct DynamicPad
...
@@ -88,19 +78,11 @@ struct DynamicPad
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
const
index_t
left_pad_
;
index_t
left_pad_
;
const
index_t
right_pad_
;
index_t
right_pad_
;
__host__
__device__
constexpr
DynamicPad
(
const
DynamicPad
&
other
)
__host__
__device__
constexpr
DynamicPad
()
=
default
;
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
},
right_pad_
{
other
.
right_pad_
}
{
}
__host__
__device__
constexpr
DynamicPad
(
DynamicPad
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
},
right_pad_
{
other
.
right_pad_
}
{
}
__host__
__device__
constexpr
DynamicPad
(
const
index_t
&
low_length
,
__host__
__device__
constexpr
DynamicPad
(
const
index_t
&
low_length
,
const
index_t
&
left_pad
,
const
index_t
&
left_pad
,
...
@@ -111,8 +93,6 @@ struct DynamicPad
...
@@ -111,8 +93,6 @@ struct DynamicPad
{
{
}
}
__host__
__device__
constexpr
DynamicPad
()
:
up_lengths_
{
0
},
left_pad_
{
0
},
right_pad_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
...
@@ -173,26 +153,16 @@ struct DynamicLeftPad
...
@@ -173,26 +153,16 @@ struct DynamicLeftPad
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
const
index_t
left_pad_
;
index_t
left_pad_
;
__host__
__device__
constexpr
DynamicLeftPad
(
const
DynamicLeftPad
&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
{
}
__host__
__device__
constexpr
DynamicLeftPad
(
DynamicLeftPad
&&
other
)
__host__
__device__
constexpr
DynamicLeftPad
()
=
default
;
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
{
}
__host__
__device__
constexpr
DynamicLeftPad
(
const
index_t
&
low_length
,
const
index_t
&
left_pad
)
__host__
__device__
constexpr
DynamicLeftPad
(
const
index_t
&
low_length
,
const
index_t
&
left_pad
)
:
up_lengths_
{
make_multi_index
(
low_length
+
left_pad
)},
left_pad_
{
left_pad
}
:
up_lengths_
{
make_multi_index
(
low_length
+
left_pad
)},
left_pad_
{
left_pad
}
{
{
}
}
__host__
__device__
constexpr
DynamicLeftPad
()
:
up_lengths_
{
0
},
left_pad_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
...
@@ -252,23 +222,11 @@ struct DynamicRightPad
...
@@ -252,23 +222,11 @@ struct DynamicRightPad
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
const
index_t
low_length_
;
index_t
low_length_
;
const
index_t
right_pad_
;
index_t
right_pad_
;
__host__
__device__
constexpr
DynamicRightPad
(
const
DynamicRightPad
&
other
)
__host__
__device__
constexpr
DynamicRightPad
()
=
default
;
:
up_lengths_
{
other
.
up_lengths_
},
low_length_
{
other
.
low_length_
},
right_pad_
{
other
.
right_pad_
}
{
}
__host__
__device__
constexpr
DynamicRightPad
(
DynamicRightPad
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
low_length_
{
other
.
low_length_
},
right_pad_
{
other
.
right_pad_
}
{
}
__host__
__device__
constexpr
DynamicRightPad
(
const
index_t
&
low_length
,
__host__
__device__
constexpr
DynamicRightPad
(
const
index_t
&
low_length
,
const
index_t
&
right_pad
)
const
index_t
&
right_pad
)
...
@@ -278,10 +236,6 @@ struct DynamicRightPad
...
@@ -278,10 +236,6 @@ struct DynamicRightPad
{
{
}
}
__host__
__device__
constexpr
DynamicRightPad
()
:
up_lengths_
{
0
},
low_length_
{
0
},
right_pad_
{
0
}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
...
@@ -342,18 +296,10 @@ struct DynamicEmbed
...
@@ -342,18 +296,10 @@ struct DynamicEmbed
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
const
UpperIndex
coefficients_
;
UpperIndex
coefficients_
;
__host__
__device__
constexpr
DynamicEmbed
(
const
DynamicEmbed
&
other
)
__host__
__device__
constexpr
DynamicEmbed
()
=
default
;
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
{
}
__host__
__device__
constexpr
DynamicEmbed
(
DynamicEmbed
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
{
}
__host__
__device__
constexpr
DynamicEmbed
(
const
UpperIndex
&
up_lengths
,
__host__
__device__
constexpr
DynamicEmbed
(
const
UpperIndex
&
up_lengths
,
const
UpperIndex
&
coefficients
)
const
UpperIndex
&
coefficients
)
...
@@ -362,19 +308,6 @@ struct DynamicEmbed
...
@@ -362,19 +308,6 @@ struct DynamicEmbed
static_assert
(
UpperIndex
::
Size
()
==
NDimUp
,
"wrong! # of dimensions not consistent"
);
static_assert
(
UpperIndex
::
Size
()
==
NDimUp
,
"wrong! # of dimensions not consistent"
);
}
}
template
<
typename
UpperLengths
,
typename
Coefficients
>
__host__
__device__
constexpr
DynamicEmbed
(
const
UpperLengths
&
up_lengths
,
const
Coefficients
&
coefficients
)
:
up_lengths_
{
up_lengths
},
coefficients_
{
coefficients
}
{
}
__host__
__device__
constexpr
DynamicEmbed
()
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
coefficients_
{
make_zero_multi_index
<
NDimUp
>
()}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
NDimUp
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
NDimUp
;
}
...
@@ -439,23 +372,11 @@ struct DynamicMerge
...
@@ -439,23 +372,11 @@ struct DynamicMerge
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
const
LowerIndex
low_lengths_
;
LowerIndex
low_lengths_
;
const
LowerIndex
low_lengths_scan_
;
LowerIndex
low_lengths_scan_
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
__host__
__device__
constexpr
DynamicMerge
(
const
DynamicMerge
&
other
)
:
low_lengths_
{
other
.
low_lengths_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
up_lengths_
{
other
.
up_lengths_
}
{
}
__host__
__device__
constexpr
DynamicMerge
(
DynamicMerge
&&
other
)
__host__
__device__
constexpr
DynamicMerge
()
=
default
;
:
low_lengths_
{
other
.
low_lengths_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
up_lengths_
{
other
.
up_lengths_
}
{
}
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
:
low_lengths_
{
low_lengths
},
:
low_lengths_
{
low_lengths
},
...
@@ -467,13 +388,6 @@ struct DynamicMerge
...
@@ -467,13 +388,6 @@ struct DynamicMerge
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
}
}
__host__
__device__
constexpr
DynamicMerge
()
:
low_lengths_
{
make_zero_multi_index
<
NDimLow
>
()},
low_lengths_scan_
{
make_zero_multi_index
<
NDimLow
>
()},
up_lengths_
{
0
}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
NDimLow
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
NDimLow
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
1
;
}
...
@@ -953,8 +867,10 @@ struct DynamicUnMerge
...
@@ -953,8 +867,10 @@ struct DynamicUnMerge
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
const
UpperIndex
up_lengths_
;
UpperIndex
up_lengths_
;
const
UpperIndex
up_lengths_scan_
;
UpperIndex
up_lengths_scan_
;
__host__
__device__
constexpr
DynamicUnMerge
()
=
default
;
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
:
up_lengths_
{
up_lengths
},
:
up_lengths_
{
up_lengths
},
...
@@ -963,12 +879,6 @@ struct DynamicUnMerge
...
@@ -963,12 +879,6 @@ struct DynamicUnMerge
{
{
}
}
__host__
__device__
constexpr
DynamicUnMerge
()
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
up_lengths_scan_
{
make_zero_multi_index
<
NDimUp
>
()}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
NDimUp
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
return
NDimUp
;
}
...
@@ -1035,11 +945,11 @@ struct DynamicFreeze
...
@@ -1035,11 +945,11 @@ struct DynamicFreeze
using
LowerIndex
=
MultiIndex
<
1
>
;
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
0
>
;
using
UpperIndex
=
MultiIndex
<
0
>
;
const
i
ndex
_t
low_idx_
;
LowerI
ndex
low_idx_
;
__host__
__device__
constexpr
DynamicFreeze
(
const
index_t
&
low_idx
)
:
low_idx_
{
low_idx
}
{}
__host__
__device__
constexpr
DynamicFreeze
(
)
=
default
;
__host__
__device__
constexpr
DynamicFreeze
()
:
low_idx_
{
0
}
{}
__host__
__device__
constexpr
DynamicFreeze
(
const
index_t
&
low_idx
)
:
low_idx_
{
low_idx
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
...
@@ -1054,7 +964,7 @@ struct DynamicFreeze
...
@@ -1054,7 +964,7 @@ struct DynamicFreeze
static_assert
(
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
1
,
static_assert
(
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
"wrong! inconsistent # of dimension"
);
idx_low
(
Number
<
0
>
{})
=
low_idx_
;
idx_low
=
low_idx_
;
}
}
template
<
typename
LowIdxDiff
,
template
<
typename
LowIdxDiff
,
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
e00de11e
...
@@ -58,6 +58,15 @@ struct DynamicTensorDescriptor
...
@@ -58,6 +58,15 @@ struct DynamicTensorDescriptor
using
Coordinate
=
DynamicTensorCoordinate
<
ndim_hidden_
,
VisibleDimensionIds
>
;
using
Coordinate
=
DynamicTensorCoordinate
<
ndim_hidden_
,
VisibleDimensionIds
>
;
public:
public:
#if 1
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
()
:
DynamicTensorDescriptor
(
Transforms
{},
index_t
{
0
})
{
}
#else
__host__
__device__
constexpr
DynamicTensorDescriptor
()
=
default
;
#endif
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
(
const
Transforms
&
transforms
,
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
index_t
element_space_size
)
:
transforms_
{
transforms
},
:
transforms_
{
transforms
},
...
@@ -71,11 +80,6 @@ struct DynamicTensorDescriptor
...
@@ -71,11 +80,6 @@ struct DynamicTensorDescriptor
// TODO check dependency of dimensions is valid
// TODO check dependency of dimensions is valid
}
}
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
()
:
DynamicTensorDescriptor
(
Transforms
{},
index_t
{
0
})
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
{
return
GetNumOfVisibleDimension
();
return
GetNumOfVisibleDimension
();
...
@@ -150,10 +154,10 @@ struct DynamicTensorDescriptor
...
@@ -150,10 +154,10 @@ struct DynamicTensorDescriptor
}
}
// TODO make these private
// TODO make these private
const
Transforms
transforms_
;
Transforms
transforms_
;
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
// variable lengths_) to save space on stack?
const
HiddenIndex
hidden_lengths_
;
HiddenIndex
hidden_lengths_
;
};
};
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
e00de11e
...
@@ -49,11 +49,11 @@ make_dynamic_naive_tensor_descriptor_packed(const MultiIndex<N>& lengths)
...
@@ -49,11 +49,11 @@ make_dynamic_naive_tensor_descriptor_packed(const MultiIndex<N>& lengths)
const
index_t
element_space_size
=
const
index_t
element_space_size
=
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor
<
decltype
(
transforms
),
return
DynamicTensorDescriptor
<
remove_cv_t
<
decltype
(
transforms
)
>
,
decltype
(
low_dim_hidden_idss
),
remove_cv_t
<
decltype
(
low_dim_hidden_idss
)
>
,
decltype
(
up_dim_hidden_idss
),
remove_cv_t
<
decltype
(
up_dim_hidden_idss
)
>
,
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
remove_cv_t
<
decltype
(
visible_dim_hidden_ids
)
>
>
{
element_space_size
};
transforms
,
element_space_size
};
}
}
template
<
index_t
N
>
template
<
index_t
N
>
...
...
composable_kernel/include/utility/tuple.hpp
View file @
e00de11e
...
@@ -17,13 +17,7 @@ struct TupleElementKey
...
@@ -17,13 +17,7 @@ struct TupleElementKey
template
<
typename
Key
,
typename
Data
>
template
<
typename
Key
,
typename
Data
>
struct
TupleElement
struct
TupleElement
{
{
__host__
__device__
explicit
constexpr
TupleElement
()
:
mData
()
{}
__host__
__device__
explicit
constexpr
TupleElement
()
=
default
;
#if 0
__host__ __device__ explicit constexpr TupleElement(const TupleElement&) = default;
__host__ __device__ explicit constexpr TupleElement(TupleElement&&) = default;
#endif
template
<
typename
UData
>
template
<
typename
UData
>
__host__
__device__
explicit
constexpr
TupleElement
(
const
TupleElement
<
Key
,
UData
>&
te
)
__host__
__device__
explicit
constexpr
TupleElement
(
const
TupleElement
<
Key
,
UData
>&
te
)
...
@@ -70,38 +64,36 @@ struct TupleImpl;
...
@@ -70,38 +64,36 @@ struct TupleImpl;
template
<
index_t
...
Is
,
typename
...
Xs
>
template
<
index_t
...
Is
,
typename
...
Xs
>
struct
TupleImpl
<
Sequence
<
Is
...
>
,
Xs
...
>
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
...
struct
TupleImpl
<
Sequence
<
Is
...
>
,
Xs
...
>
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
...
{
{
__host__
__device__
explicit
constexpr
TupleImpl
()
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
()...
#if 1
{
__host__
__device__
explicit
constexpr
TupleImpl
()
=
default
;
static_assert
(
sizeof
...(
Is
)
==
sizeof
...(
Xs
),
"wrong! inconsistent size"
);
}
#if 0
__host__ __device__ explicit constexpr TupleImpl(const TupleImpl&) = default;
__host__ __device__ explicit constexpr TupleImpl(TupleImpl&&) = default;
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
#endif
__host__
__device__
explicit
constexpr
TupleImpl
(
Ys
&&
...
ys
)
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
std
::
forward
<
Ys
>
(
ys
))...
template
<
index_t
...
Js
,
typename
...
Ys
>
__host__
__device__
explicit
constexpr
TupleImpl
(
const
TupleImpl
<
Sequence
<
Js
...
>
,
Ys
...
>&
y
)
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
static_cast
<
const
TupleElement
<
TupleElementKey
<
Js
>
,
Ys
>&>
(
y
))...
{
{
static_assert
(
sizeof
...(
Is
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Is
)
==
sizeof
...(
Ys
),
"wrong! inconsistent size"
);
}
}
#else
__host__
__device__
explicit
constexpr
TupleImpl
()
=
default
;
template
<
index_t
...
Js
,
typename
...
Ys
>
template
<
typename
Y
,
__host__
__device__
explicit
constexpr
TupleImpl
(
TupleImpl
<
Sequence
<
Js
...
>
,
Ys
...
>&&
y
)
typename
std
::
enable_if
<
sizeof
...(
Is
)
==
1
&&
sizeof
...(
Xs
)
==
1
&&
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
!
is_same
<
remove_cv_t
<
Y
>,
TupleImpl
>::
value
,
static_cast
<
TupleElement
<
TupleElementKey
<
Js
>
,
Ys
>&&>
(
y
))...
bool
>::
type
=
false
>
__host__
__device__
explicit
constexpr
TupleImpl
(
Y
&&
y
)
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
std
::
forward
<
Y
>
(
y
))...
{
{
}
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
__host__
__device__
explicit
constexpr
TupleImpl
(
Ys
&&
...
ys
)
__host__
__device__
explicit
constexpr
TupleImpl
(
Ys
&&
...
ys
)
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
std
::
forward
<
Ys
>
(
ys
))...
:
TupleElement
<
TupleElementKey
<
Is
>
,
Xs
>
(
std
::
forward
<
Ys
>
(
ys
))...
{
{
static_assert
(
sizeof
...(
Is
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Is
)
==
sizeof
...(
Ys
),
static_assert
(
sizeof
...(
Is
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Is
)
==
sizeof
...(
Ys
),
"wrong! inconsistent size"
);
"wrong! inconsistent size"
);
}
}
#endif
__host__
__device__
static
constexpr
index_t
Size
()
{
return
sizeof
...(
Xs
);
}
__host__
__device__
static
constexpr
index_t
Size
()
{
return
sizeof
...(
Xs
);
}
...
@@ -126,34 +118,17 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -126,34 +118,17 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
using
base
=
using
base
=
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Xs
),
1
>::
type
,
Xs
...
>
;
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Xs
),
1
>::
type
,
Xs
...
>
;
__host__
__device__
explicit
constexpr
Tuple
()
:
base
()
{}
__host__
__device__
constexpr
Tuple
()
=
default
;
#if 0
__host__ __device__ constexpr Tuple(const Tuple&) = default;
__host__ __device__ constexpr Tuple(Tuple&&) = default;
#endif
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
<
Ys
...
>&
y
)
:
base
(
static_cast
<
const
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
Ys
...
>&>
(
y
))
{
}
template
<
typename
...
Ys
,
template
<
typename
Y
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Xs
)
==
1
&&
!
is_same
<
remove_cv_t
<
Y
>,
Tuple
>::
value
,
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
<
Ys
...
>&&
y
)
bool
>::
type
=
false
>
:
base
(
static_cast
<
__host__
__device__
explicit
constexpr
Tuple
(
Y
&&
y
)
:
base
(
std
::
forward
<
Y
>
(
y
))
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
Ys
...
>&&>
(
y
))
{
{
}
}
template
<
typename
...
Ys
,
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
1
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
2
,
bool
>::
type
=
false
>
bool
>::
type
=
false
>
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
{
...
...
driver/src/conv_driver.cpp
View file @
e00de11e
...
@@ -11,12 +11,12 @@
...
@@ -11,12 +11,12 @@
#include "conv_common.hpp"
#include "conv_common.hpp"
#include "host_conv.hpp"
#include "host_conv.hpp"
#include "device_tensor.hpp"
#include "device_tensor.hpp"
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
//
#include "device_convolution_forward_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
//
#include "device_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dummy_static_transform.hpp"
//
#include "device_dummy_static_transform.hpp"
#include "device_dummy_dynamic_transform_v1.hpp"
//
#include "device_dummy_dynamic_transform_v1.hpp"
#include "device_dummy_dynamic_transform.hpp"
//
#include "device_dummy_dynamic_transform.hpp"
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
...
...
script/cmake-rocm3.7.sh
View file @
e00de11e
...
@@ -10,7 +10,7 @@ cmake
...
@@ -10,7 +10,7 @@ cmake
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx906 -gline-tables-only -save-temps=
$CWD
"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx906 -gline-tables-only -save-temps=
$CWD
-ftemplate-backtrace-limit=0"
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment