Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
9535f806
Commit
9535f806
authored
Sep 26, 2020
by
Chao Liu
Browse files
refactoring array
parent
c98cbea0
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
137 additions
and
238 deletions
+137
-238
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
+35
-29
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
+1
-1
composable_kernel/include/tensor_description/multi_index.hpp
composable_kernel/include/tensor_description/multi_index.hpp
+53
-2
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+3
-3
composable_kernel/include/tensor_description/tensor_coordinate.hpp
...e_kernel/include/tensor_description/tensor_coordinate.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_descriptor.hpp
...e_kernel/include/tensor_description/tensor_descriptor.hpp
+6
-4
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
.../tensor_operation/blockwise_generic_tensor_slice_copy.hpp
+2
-2
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
...tensor_operation/threadwise_generic_tensor_slice_copy.hpp
+7
-7
composable_kernel/include/utility/array.hpp
composable_kernel/include/utility/array.hpp
+19
-141
composable_kernel/include/utility/functional3.hpp
composable_kernel/include/utility/functional3.hpp
+1
-1
composable_kernel/include/utility/statically_indexed_array.hpp
...sable_kernel/include/utility/statically_indexed_array.hpp
+0
-38
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+7
-7
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
View file @
9535f806
...
@@ -18,29 +18,34 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
...
@@ -18,29 +18,34 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
Array
<
index_t
,
2
>
in_right_pads
)
{
{
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
constexpr
auto
i0
=
Number
<
0
>
{};
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
constexpr
auto
i1
=
Number
<
1
>
{};
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
1
);
constexpr
auto
i2
=
Number
<
2
>
{};
constexpr
auto
i3
=
Number
<
3
>
{};
const
index_t
Y
=
wei_k_c_y_x_global_desc
.
GetLength
(
2
);
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
i0
);
const
index_t
X
=
wei_k_c_y_x_global_desc
.
GetLength
(
3
);
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
i1
);
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
i1
);
const
index_t
Hi
=
in_n_c_hi_wi
_global_desc
.
GetLength
(
2
);
const
index_t
Y
=
wei_k_c_y_x
_global_desc
.
GetLength
(
i
2
);
const
index_t
Wi
=
in_n_c_hi_wi
_global_desc
.
GetLength
(
3
);
const
index_t
X
=
wei_k_c_y_x
_global_desc
.
GetLength
(
i
3
);
const
index_t
H
o
=
out
_n_
k
_h
o
_w
o
_global_desc
.
GetLength
(
2
);
const
index_t
H
i
=
in
_n_
c
_h
i
_w
i
_global_desc
.
GetLength
(
i
2
);
const
index_t
W
o
=
out
_n_
k
_h
o
_w
o
_global_desc
.
GetLength
(
3
);
const
index_t
W
i
=
in
_n_
c
_h
i
_w
i
_global_desc
.
GetLength
(
i
3
);
const
index_t
ConvStrideH
=
conv_strides
[
0
]
;
const
index_t
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
i2
)
;
const
index_t
ConvStrideW
=
conv_strides
[
1
]
;
const
index_t
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
i3
)
;
const
index_t
Conv
Dilation
H
=
conv_
dilation
s
[
0
];
const
index_t
Conv
Stride
H
=
conv_
stride
s
[
i
0
];
const
index_t
Conv
Dilation
W
=
conv_
dilation
s
[
1
];
const
index_t
Conv
Stride
W
=
conv_
stride
s
[
i
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
0
];
const
index_t
ConvDilationH
=
conv_dilations
[
i0
];
const
index_t
InLeftPadW
=
in_left_pads
[
1
];
const
index_t
ConvDilationW
=
conv_dilations
[
i1
];
const
index_t
InRightPadH
=
in_right_pads
[
0
];
const
index_t
InRightPadW
=
in_right_pads
[
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
i0
];
const
index_t
InLeftPadW
=
in_left_pads
[
i1
];
const
index_t
InRightPadH
=
in_right_pads
[
i0
];
const
index_t
InRightPadW
=
in_right_pads
[
i1
];
// input tensor
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
...
@@ -59,24 +64,25 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
...
@@ -59,24 +64,25 @@ map_convolution_into_gemm_v1(const WeiDesc& wei_k_c_y_x_global_desc,
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
2
);
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
i
2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
3
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
i
3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
C
},
DynamicPassThrough
{
N
},
DynamicEmbed
<
2
>
{{
Y
,
Ho
},
{
ConvDilationH
,
ConvStrideH
,
0
}},
DynamicPassThrough
{
C
},
DynamicEmbed
<
2
>
{{
X
,
Wo
},
{
ConvDilationW
,
ConvStrideW
,
0
}}),
DynamicEmbed
<
2
>
{
make_multi_index
(
Y
,
Ho
),
make_multi_index
(
ConvDilationH
,
ConvStrideH
)},
DynamicEmbed
<
2
>
{
make_multi_index
(
X
,
Wo
),
make_multi_index
(
ConvDilationW
,
ConvStrideW
)}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{
M
ulti
I
ndex
<
3
>
{{
C
,
Y
,
X
}}
},
make_tuple
(
DynamicMerge
<
3
>
{
make_m
ulti
_i
ndex
(
C
,
Y
,
X
)
},
DynamicMerge
<
3
>
{
M
ulti
I
ndex
<
3
>
{{
N
,
Ho
,
Wo
}}
}),
DynamicMerge
<
3
>
{
make_m
ulti
_i
ndex
(
N
,
Ho
,
Wo
)
}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
return
make_tuple
(
in_gemmk_gemmn_global_desc
);
return
make_tuple
(
in_gemmk_gemmn_global_desc
);
}
}
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
9535f806
...
@@ -183,7 +183,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -183,7 +183,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
AddressSpace
::
Vgpr
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Lds
,
AddressSpace
::
Lds
,
InMemoryDataOperation
::
Set
>
(
InMemoryDataOperation
::
Set
>
(
MultiIndex
<
4
>
{
0
,
0
,
b_block_data_on_global
,
0
},
MultiIndex
<
4
>
{
0
,
0
,
0
,
0
});
MultiIndex
<
4
>
{
{
0
,
0
,
b_block_data_on_global
,
0
}
}
,
MultiIndex
<
4
>
{
{
0
,
0
,
0
,
0
}
}
);
// weight tensor
// weight tensor
// global tensor in global memory, src of blockwise copy
// global tensor in global memory, src of blockwise copy
...
...
composable_kernel/include/tensor_description/multi_index.hpp
View file @
9535f806
...
@@ -12,9 +12,8 @@ using MultiIndex = Array<index_t, N>;
...
@@ -12,9 +12,8 @@ using MultiIndex = Array<index_t, N>;
template
<
typename
...
Xs
>
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
Xs
...
xs
)
__host__
__device__
constexpr
auto
make_multi_index
(
Xs
...
xs
)
{
{
return
MultiIndex
<
sizeof
...(
Xs
)
>
{{
static_cast
<
index_t
>
(
xs
)
...
}}
;
return
make_array
<
index_t
>
(
xs
...
)
;
}
}
#else
#else
template
<
index_t
N
>
template
<
index_t
N
>
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
...
@@ -33,5 +32,57 @@ __host__ __device__ constexpr auto make_zero_multi_index()
...
@@ -33,5 +32,57 @@ __host__ __device__ constexpr auto make_zero_multi_index()
typename
uniform_sequence_gen
<
NSize
,
0
>::
type
{});
typename
uniform_sequence_gen
<
NSize
,
0
>::
type
{});
}
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
to_multi_index
(
const
T
&
x
)
{
return
unpack
([](
auto
...
ys
)
{
return
make_multi_index
(
ys
...);
},
x
);
}
template
<
index_t
NSize
,
typename
X
>
__host__
__device__
constexpr
auto
operator
+=
(
MultiIndex
<
NSize
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
NSize
,
"wrong! size not the same"
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
+=
x
[
i
];
});
return
y
;
}
template
<
index_t
NSize
,
typename
X
>
__host__
__device__
constexpr
auto
operator
-=
(
MultiIndex
<
NSize
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
NSize
,
"wrong! size not the same"
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
-=
x
[
i
];
});
return
y
;
}
template
<
index_t
NSize
,
typename
T
>
__host__
__device__
constexpr
auto
operator
+
(
const
MultiIndex
<
NSize
>&
a
,
const
T
&
b
)
{
using
type
=
MultiIndex
<
NSize
>
;
static_assert
(
T
::
Size
()
==
NSize
,
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
[
i
]
+
b
[
i
];
});
return
r
;
}
template
<
index_t
NSize
,
typename
T
>
__host__
__device__
constexpr
auto
operator
-
(
const
MultiIndex
<
NSize
>&
a
,
const
T
&
b
)
{
using
type
=
MultiIndex
<
NSize
>
;
static_assert
(
T
::
Size
()
==
NSize
,
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
[
i
]
-
b
[
i
];
});
return
r
;
}
template
<
index_t
NSize
,
typename
T
>
__host__
__device__
constexpr
auto
operator
*
(
const
MultiIndex
<
NSize
>&
a
,
const
T
&
b
)
{
using
type
=
MultiIndex
<
NSize
>
;
static_assert
(
T
::
Size
()
==
NSize
,
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
[
i
]
*
b
[
i
];
});
return
r
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
9535f806
...
@@ -234,7 +234,7 @@ struct Merge
...
@@ -234,7 +234,7 @@ struct Merge
{
{
if
(
idx_up_diff
[
0
]
==
0
)
if
(
idx_up_diff
[
0
]
==
0
)
{
{
return
make_zero_
array
<
index
_t
,
nDimLow
>
();
return
make_zero_
multi_
index
<
nDimLow
>
();
}
}
else
else
{
{
...
@@ -459,7 +459,7 @@ struct Embed
...
@@ -459,7 +459,7 @@ struct Embed
for
(
index_t
icorner
=
0
;
icorner
<
ncorner
;
++
icorner
)
for
(
index_t
icorner
=
0
;
icorner
<
ncorner
;
++
icorner
)
{
{
// generate upper index for each corner
// generate upper index for each corner
auto
idx_up
=
make_zero_
array
<
index
_t
,
nDimUp
>
();
auto
idx_up
=
make_zero_
multi_
index
<
nDimUp
>
();
index_t
itmp
=
icorner
;
index_t
itmp
=
icorner
;
...
@@ -512,7 +512,7 @@ struct Freeze
...
@@ -512,7 +512,7 @@ struct Freeze
const
UpperIndex
&
/* idx_up_old */
,
const
UpperIndex
&
/* idx_up_old */
,
const
LowerIndex
&
/* idx_low_old */
)
const
LowerIndex
&
/* idx_low_old */
)
{
{
return
make_zero_
array
<
index
_t
,
nDimLow
>
();
return
make_zero_
multi_
index
<
nDimLow
>
();
}
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
composable_kernel/include/tensor_description/tensor_coordinate.hpp
View file @
9535f806
...
@@ -270,7 +270,7 @@ struct TensorCoordinate
...
@@ -270,7 +270,7 @@ struct TensorCoordinate
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
{
{
return
NativeTensorCoordinate
<
NativeTensorDescriptor
<
Ts
...
>>
(
return
NativeTensorCoordinate
<
NativeTensorDescriptor
<
Ts
...
>>
(
make_zero_
array
<
index
_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
make_zero_
multi_
index
<
TensorDesc
::
GetNumOfDimension
()
>
());
}
}
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
...
@@ -278,7 +278,7 @@ struct TensorCoordinate
...
@@ -278,7 +278,7 @@ struct TensorCoordinate
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
{
{
return
TransformedTensorCoordinate
<
TransformedTensorDescriptor
<
Ts
...
>>
(
return
TransformedTensorCoordinate
<
TransformedTensorDescriptor
<
Ts
...
>>
(
make_zero_
array
<
index
_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
make_zero_
multi_
index
<
TensorDesc
::
GetNumOfDimension
()
>
());
}
}
public:
public:
...
...
composable_kernel/include/tensor_description/tensor_descriptor.hpp
View file @
9535f806
...
@@ -317,7 +317,7 @@ struct TransformedTensorDescriptor
...
@@ -317,7 +317,7 @@ struct TransformedTensorDescriptor
// this assume each lower (single) index is only assocaited with one transformation,
// this assume each lower (single) index is only assocaited with one transformation,
// which is required for index transformation, and has been checked during constructor
// which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor
// of TransformedTensorDescriptor
idx_low_part
=
tran
.
CalculateLowerIndex
(
to_
array
(
idx_up_part
));
idx_low_part
=
tran
.
CalculateLowerIndex
(
to_
multi_index
(
idx_up_part
));
});
});
return
idx_low
;
return
idx_low
;
...
@@ -345,8 +345,9 @@ struct TransformedTensorDescriptor
...
@@ -345,8 +345,9 @@ struct TransformedTensorDescriptor
// this assume each lower (single) index is associated with only one transformation,
// this assume each lower (single) index is associated with only one transformation,
// which is required for index transformation, and has been checked during constructor
// which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor
// of TransformedTensorDescriptor
idx_low_diff_part
=
tran
.
CalculateLowerIndexDiff
(
idx_low_diff_part
=
tran
.
CalculateLowerIndexDiff
(
to_multi_index
(
idx_up_diff_part
),
to_array
(
idx_up_diff_part
),
to_array
(
idx_up_old_part
),
to_array
(
idx_low_old_part
));
to_multi_index
(
idx_up_old_part
),
to_multi_index
(
idx_low_old_part
));
});
});
return
idx_low_diff
;
return
idx_low_diff
;
...
@@ -506,7 +507,8 @@ struct TransformedTensorDescriptor
...
@@ -506,7 +507,8 @@ struct TransformedTensorDescriptor
constexpr
auto
low_dims_part
=
LowDimensionIds
{}.
At
(
itran
);
constexpr
auto
low_dims_part
=
LowDimensionIds
{}.
At
(
itran
);
constexpr
auto
low_lengths_part
=
constexpr
auto
low_lengths_part
=
GetLowerTensorDescriptor
().
GetLengths
(
low_dims_part
);
GetLowerTensorDescriptor
().
GetLengths
(
low_dims_part
);
const
auto
idx_low_part
=
to_array
(
pick_array_element
(
idx_low
,
low_dims_part
));
const
auto
idx_low_part
=
to_multi_index
(
pick_array_element
(
idx_low
,
low_dims_part
));
for
(
index_t
i
=
0
;
i
<
low_dims_part
.
Size
();
++
i
)
for
(
index_t
i
=
0
;
i
<
low_dims_part
.
Size
();
++
i
)
{
{
...
...
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
View file @
9535f806
...
@@ -68,9 +68,9 @@ struct BlockwiseGenericTensorSliceCopy_v4
...
@@ -68,9 +68,9 @@ struct BlockwiseGenericTensorSliceCopy_v4
const
auto
thread_data_id_begin
=
thread_cluster_id
*
ThreadSliceLengths
{};
const
auto
thread_data_id_begin
=
thread_cluster_id
*
ThreadSliceLengths
{};
mThreadwiseLoad
.
SetSrcSliceOrigin
(
src_block_slice_origin
+
thread_data_id_begin
);
mThreadwiseLoad
.
SetSrcSliceOrigin
(
src_block_slice_origin
+
thread_data_id_begin
);
mThreadwiseLoad
.
SetDstSliceOrigin
(
make_zero_
array
<
index
_t
,
nDim
>
());
mThreadwiseLoad
.
SetDstSliceOrigin
(
make_zero_
multi_
index
<
nDim
>
());
mThreadwiseStore
.
SetSrcSliceOrigin
(
make_zero_
array
<
index
_t
,
nDim
>
());
mThreadwiseStore
.
SetSrcSliceOrigin
(
make_zero_
multi_
index
<
nDim
>
());
mThreadwiseStore
.
SetDstSliceOrigin
(
dst_block_slice_origin
+
thread_data_id_begin
);
mThreadwiseStore
.
SetDstSliceOrigin
(
dst_block_slice_origin
+
thread_data_id_begin
);
}
}
}
}
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
View file @
9535f806
...
@@ -54,8 +54,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
...
@@ -54,8 +54,8 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
}
}
__device__
constexpr
ThreadwiseGenericTensorSliceCopy_v4r2
()
__device__
constexpr
ThreadwiseGenericTensorSliceCopy_v4r2
()
:
ThreadwiseGenericTensorSliceCopy_v4r2
(
make_zero_
array
<
index
_t
,
nDim
>
(),
:
ThreadwiseGenericTensorSliceCopy_v4r2
(
make_zero_
multi_
index
<
nDim
>
(),
make_zero_
array
<
index
_t
,
nDim
>
())
make_zero_
multi_
index
<
nDim
>
())
{
{
}
}
...
@@ -104,7 +104,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
...
@@ -104,7 +104,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// load data from src to the long-vector buffer
// load data from src to the long-vector buffer
for
(
index_t
i
=
0
;
i
<
long_vector_size
/
src_data_per_access
;
++
i
)
for
(
index_t
i
=
0
;
i
<
long_vector_size
/
src_data_per_access
;
++
i
)
{
{
auto
scalar_id
=
make_zero_
array
<
index
_t
,
nDim
>
();
auto
scalar_id
=
make_zero_
multi_
index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
src_data_per_access
;
scalar_id
(
vector_access_dim
)
=
i
*
src_data_per_access
;
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
...
@@ -143,7 +143,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
...
@@ -143,7 +143,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
// store data from the long-vector buffer to dst
// store data from the long-vector buffer to dst
for
(
index_t
i
=
0
;
i
<
long_vector_size
/
dst_data_per_access
;
++
i
)
for
(
index_t
i
=
0
;
i
<
long_vector_size
/
dst_data_per_access
;
++
i
)
{
{
auto
scalar_id
=
make_zero_
array
<
index
_t
,
nDim
>
();
auto
scalar_id
=
make_zero_
multi_
index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
dst_data_per_access
;
scalar_id
(
vector_access_dim
)
=
i
*
dst_data_per_access
;
const
index_t
buffer_offset
=
i
*
dst_data_per_access
;
const
index_t
buffer_offset
=
i
*
dst_data_per_access
;
...
@@ -177,9 +177,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
...
@@ -177,9 +177,9 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
__device__
void
MoveSrcSliceWindow
(
const
T
&
step_sizes_
,
__device__
void
MoveSrcSliceWindow
(
const
T
&
step_sizes_
,
integral_constant
<
bool
,
PositiveDirection
>
)
integral_constant
<
bool
,
PositiveDirection
>
)
{
{
const
auto
step_sizes
=
to_
array
(
step_sizes_
);
const
auto
step_sizes
=
to_
multi_index
(
step_sizes_
);
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mSrcSliceOrigin
+=
to_
array
(
step_sizes
);
})
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mSrcSliceOrigin
+=
to_
multi_index
(
step_sizes
);
})
.
Else
([
&
](
auto
)
{
mSrcSliceOrigin
-=
step_sizes
;
});
.
Else
([
&
](
auto
)
{
mSrcSliceOrigin
-=
step_sizes
;
});
}
}
...
@@ -187,7 +187,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
...
@@ -187,7 +187,7 @@ struct ThreadwiseGenericTensorSliceCopy_v4r2
__device__
void
MoveDstSliceWindow
(
const
T
&
step_sizes_
,
__device__
void
MoveDstSliceWindow
(
const
T
&
step_sizes_
,
integral_constant
<
bool
,
PositiveDirection
>
)
integral_constant
<
bool
,
PositiveDirection
>
)
{
{
const
auto
step_sizes
=
to_
array
(
step_sizes_
);
const
auto
step_sizes
=
to_
multi_index
(
step_sizes_
);
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mDstSliceOrigin
+=
step_sizes
;
})
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mDstSliceOrigin
+=
step_sizes
;
})
.
Else
([
&
](
auto
)
{
mDstSliceOrigin
-=
step_sizes
;
});
.
Else
([
&
](
auto
)
{
mDstSliceOrigin
-=
step_sizes
;
});
...
...
composable_kernel/include/utility/array.hpp
View file @
9535f806
...
@@ -9,49 +9,20 @@ namespace ck {
...
@@ -9,49 +9,20 @@ namespace ck {
template
<
typename
TData
,
index_t
NSize
>
template
<
typename
TData
,
index_t
NSize
>
struct
Array
struct
Array
{
{
using
type
=
Array
<
TData
,
NSize
>
;
using
type
=
Array
;
using
data_type
=
TData
;
using
data_type
=
TData
;
// hack: add extra element to allow empty array
TData
mData
[
NSize
]
=
{
0
};
// TODO: implement empty Array
TData
mData
[
NSize
+
1
]
=
{
0
};
__host__
__device__
static
constexpr
index_t
Size
()
{
return
NSize
;
}
__host__
__device__
static
constexpr
index_t
Size
()
{
return
NSize
;
}
// TODO: remove
__host__
__device__
static
constexpr
index_t
GetSize
()
{
return
Size
();
}
template
<
index_t
I
>
__host__
__device__
constexpr
const
TData
&
At
(
Number
<
I
>
)
const
{
static_assert
(
I
<
NSize
,
"wrong!"
);
return
mData
[
I
];
}
template
<
index_t
I
>
__host__
__device__
constexpr
TData
&
At
(
Number
<
I
>
)
{
static_assert
(
I
<
NSize
,
"wrong!"
);
return
mData
[
I
];
}
__host__
__device__
constexpr
const
TData
&
At
(
index_t
i
)
const
{
return
mData
[
i
];
}
__host__
__device__
constexpr
const
TData
&
At
(
index_t
i
)
const
{
return
mData
[
i
];
}
__host__
__device__
constexpr
TData
&
At
(
index_t
i
)
{
return
mData
[
i
];
}
__host__
__device__
constexpr
TData
&
At
(
index_t
i
)
{
return
mData
[
i
];
}
template
<
typename
I
>
__host__
__device__
constexpr
const
TData
&
operator
[](
index_t
i
)
const
{
return
At
(
i
);
}
__host__
__device__
constexpr
const
TData
&
operator
[](
I
i
)
const
{
return
At
(
i
);
}
template
<
typename
I
>
__host__
__device__
constexpr
TData
&
operator
()(
index_t
i
)
{
return
At
(
i
);
}
__host__
__device__
constexpr
TData
&
operator
()(
I
i
)
{
return
At
(
i
);
}
template
<
typename
T
>
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
=
(
const
T
&
a
)
__host__
__device__
constexpr
auto
operator
=
(
const
T
&
a
)
...
@@ -62,127 +33,34 @@ struct Array
...
@@ -62,127 +33,34 @@ struct Array
return
*
this
;
return
*
this
;
}
}
};
template
<
typename
T
>
// empty Array
__host__
__device__
constexpr
auto
operator
+=
(
const
T
&
a
)
template
<
typename
TData
>
{
struct
Array
<
TData
,
0
>
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
{
using
type
=
Array
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
+=
a
[
i
];
});
using
data_type
=
TData
;
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
-=
(
const
T
&
a
)
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
-=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
+
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
+
a
[
i
];
});
return
r
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
-
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
-
a
[
i
];
});
return
r
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
*
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
*
a
[
i
];
});
return
r
;
}
struct
lambda_PushBack
// emulate constexpr lambda
{
const
Array
<
TData
,
NSize
>&
old_array
;
Array
<
TData
,
NSize
+
1
>&
new_array
;
__host__
__device__
constexpr
lambda_PushBack
(
const
Array
<
TData
,
NSize
>&
old_array_
,
Array
<
TData
,
NSize
+
1
>&
new_array_
)
:
old_array
(
old_array_
),
new_array
(
new_array_
)
{
}
template
<
index_t
I
>
__host__
__device__
constexpr
void
operator
()(
Number
<
I
>
)
const
{
new_array
(
Number
<
I
>
{})
=
old_array
[
I
];
}
};
__host__
__device__
constexpr
auto
PushBack
(
TData
x
)
const
{
Array
<
TData
,
NSize
+
1
>
new_array
;
static_for
<
0
,
NSize
,
1
>
{}(
lambda_PushBack
(
*
this
,
new_array
));
new_array
(
Number
<
NSize
>
{})
=
x
;
return
new_array
;
}
template
<
index_t
NAppend
>
__host__
__device__
constexpr
auto
Append
(
const
Array
<
TData
,
NAppend
>&
xs
)
const
{
Array
<
TData
,
NSize
+
NAppend
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
r
,
this
](
auto
i
)
constexpr
{
r
(
i
)
=
(
*
this
)[
i
];
});
static_for
<
0
,
NAppend
,
1
>
{}([
&
r
,
&
xs
](
auto
i
)
constexpr
{
r
(
NSize
+
i
)
=
xs
[
i
];
});
return
r
;
__host__
__device__
static
constexpr
index_t
Size
()
{
return
0
;
}
}
};
};
template
<
typename
X
,
typename
...
Xs
>
template
<
typename
X
,
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_array
(
const
X
&
x
,
const
Xs
&
...
xs
)
__host__
__device__
constexpr
auto
make_array
(
const
X
&
x
,
const
Xs
&
...
xs
)
{
{
return
Array
<
X
,
sizeof
...(
x
s
)
+
1
>
{{
x
,
xs
...}};
return
Array
<
X
,
sizeof
...(
X
s
)
+
1
>
{{
x
,
static_cast
<
X
>
(
xs
)
...}};
}
}
template
<
typename
T
>
template
<
typename
T
Data
,
index_t
NSize
>
__host__
__device__
constexpr
auto
to_array
(
const
T
&
x
)
__host__
__device__
constexpr
auto
push_back
(
Array
<
TData
,
NSize
>&
a
,
const
T
Data
&
x
)
{
{
Array
<
typename
T
::
data_type
,
T
::
Size
()
>
y
;
Array
<
TData
,
NSize
+
1
>
r
;
static_for
<
0
,
T
::
Size
()
,
1
>
{}([
&
](
auto
i
)
{
y
.
At
(
i
)
=
x
.
At
(
i
)
;
});
static_for
<
0
,
N
Size
,
1
>
{}([
&
r
,
&
a
](
auto
i
)
constexpr
{
r
(
i
)
=
a
[
i
]
;
});
return
y
;
r
(
Number
<
NSize
>
{})
=
x
;
}
template
<
typename
TData
,
index_t
NSize
>
return
r
;
__host__
__device__
constexpr
auto
make_zero_array
()
{
constexpr
auto
zero_sequence
=
typename
uniform_sequence_gen
<
NSize
,
0
>::
type
{};
constexpr
auto
zero_array
=
to_array
(
zero_sequence
);
return
zero_array
;
}
}
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/utility/functional3.hpp
View file @
9535f806
...
@@ -63,7 +63,7 @@ struct ford_impl
...
@@ -63,7 +63,7 @@ struct ford_impl
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
{
{
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
f
,
current_ordered_id
.
PushBack
(
i
));
f
,
push_back
(
current_ordered_id
,
i
));
}
}
}
}
};
};
...
...
composable_kernel/include/utility/statically_indexed_array.hpp
View file @
9535f806
...
@@ -386,43 +386,5 @@ struct StaticallyIndexedArray<TData, 22> : Tuple<TData,
...
@@ -386,43 +386,5 @@ struct StaticallyIndexedArray<TData, 22> : Tuple<TData,
using
data_type
=
TData
;
using
data_type
=
TData
;
};
};
template
<
typename
TData
,
index_t
NSize
,
typename
X
>
__host__
__device__
constexpr
auto
operator
+=
(
StaticallyIndexedArray
<
TData
,
NSize
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
NSize
,
"wrong! size not the same"
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
+=
x
[
i
];
});
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
X
>
__host__
__device__
constexpr
auto
operator
-=
(
StaticallyIndexedArray
<
TData
,
NSize
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
NSize
,
"wrong! size not the same"
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
-=
x
[
i
];
});
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
T
>
__host__
__device__
constexpr
auto
operator
+
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
a
,
const
T
&
b
)
{
using
type
=
StaticallyIndexedArray
<
TData
,
NSize
>
;
static_assert
(
T
::
Size
()
==
NSize
,
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
[
i
]
+
b
[
i
];
});
return
r
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
T
>
__host__
__device__
constexpr
auto
operator
-
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
a
,
const
T
&
b
)
{
using
type
=
StaticallyIndexedArray
<
TData
,
NSize
>
;
static_assert
(
T
::
Size
()
==
NSize
,
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
[
i
]
-
b
[
i
];
});
return
r
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
9535f806
...
@@ -29,16 +29,16 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -29,16 +29,16 @@ void device_dummy_dynamic_transform_v2(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_
array
(
InDesc
::
GetLengths
()),
to_
array
(
InDesc
::
GetStrides
()));
to_
multi_index
(
InDesc
::
GetLengths
()),
to_
multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_
array
(
WeiDesc
::
GetLengths
()),
to_
array
(
WeiDesc
::
GetStrides
()));
to_
multi_index
(
WeiDesc
::
GetLengths
()),
to_
multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_
array
(
OutDesc
::
GetLengths
()),
to_
array
(
OutDesc
::
GetStrides
()));
to_
multi_index
(
OutDesc
::
GetLengths
()),
to_
multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_
array
(
ConvStrides
{});
const
auto
conv_strides
=
to_
multi_index
(
ConvStrides
{});
const
auto
conv_dilations
=
to_
array
(
ConvDilations
{});
const
auto
conv_dilations
=
to_
multi_index
(
ConvDilations
{});
const
auto
in_left_pads
=
to_
array
(
InLeftPads
{});
const
auto
in_left_pads
=
to_
multi_index
(
InLeftPads
{});
const
auto
in_right_pads
=
to_
array
(
InRightPads
{});
const
auto
in_right_pads
=
to_
multi_index
(
InRightPads
{});
const
auto
tensor_descs
=
map_convolution_into_gemm_v2
(
wei_kcyx_desc
,
const
auto
tensor_descs
=
map_convolution_into_gemm_v2
(
wei_kcyx_desc
,
in_nchw_desc
,
in_nchw_desc
,
...
...
driver/src/conv_driver.cpp
View file @
9535f806
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
#endif
#endif
}
}
#if
0
#if
1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
in_nchw
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx_desc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment