Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
a339028c
"...composable_kernel_rocm.git" did not exist on "34c661e71cc8cf4753843a58786c8f6211ec5e22"
Commit
a339028c
authored
Sep 30, 2020
by
Chao Liu
Browse files
adding statically indexed array
parent
e7f633c5
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
126 additions
and
604 deletions
+126
-604
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+3
-3
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
+2
-2
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+39
-43
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+7
-4
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+2
-2
composable_kernel/include/utility/array_element_picker.hpp
composable_kernel/include/utility/array_element_picker.hpp
+7
-5
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+20
-1
composable_kernel/include/utility/print.hpp
composable_kernel/include/utility/print.hpp
+2
-2
composable_kernel/include/utility/statically_indexed_array.hpp
...sable_kernel/include/utility/statically_indexed_array.hpp
+37
-527
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+2
-2
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+5
-13
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
a339028c
...
@@ -123,7 +123,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -123,7 +123,7 @@ struct DummyDynamicTransform_v2_1
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
#pragma unroll
#pragma unroll
1
for
(
index_t
i
=
0
;
i
<
10
;
++
i
)
for
(
index_t
i
=
0
;
i
<
10
;
++
i
)
{
{
move_dynamic_tensor_coordinate_v2
(
move_dynamic_tensor_coordinate_v2
(
...
@@ -293,14 +293,14 @@ struct DummyDynamicTransform_v2_2
...
@@ -293,14 +293,14 @@ struct DummyDynamicTransform_v2_2
// initialize idx
// initialize idx
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
#if
0
#if
1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc,
M
ulti
I
ndex
<2>{{
1, 0
}}
);
in_gemmk_gemmn_global_desc
,
make_m
ulti
_i
ndex
(
1
,
0
)
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
a339028c
...
@@ -119,8 +119,8 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -119,8 +119,8 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
const
auto
block_work_id
=
block_work_desc
.
CalculateClusterIndex
(
get_block_1d_id
());
const
auto
block_work_id
=
block_work_desc
.
CalculateClusterIndex
(
get_block_1d_id
());
const
index_t
k_block_data_on_global
=
block_work_id
[
0
]
*
KPerBlock
;
const
index_t
k_block_data_on_global
=
block_work_id
[
I
0
]
*
KPerBlock
;
const
index_t
b_block_data_on_global
=
block_work_id
[
1
]
*
BPerBlock
;
const
index_t
b_block_data_on_global
=
block_work_id
[
I
1
]
*
BPerBlock
;
// input tensor
// input tensor
// global tensor in global memory
// global tensor in global memory
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
a339028c
...
@@ -14,27 +14,27 @@ struct DynamicPassThrough
...
@@ -14,27 +14,27 @@ struct DynamicPassThrough
const
UpperIndex
up_lengths_
;
const
UpperIndex
up_lengths_
;
#if 0
#if 0
__host__ __device__
explicit
constexpr DynamicPassThrough(const DynamicPassThrough&) = default;
__host__ __device__
constexpr DynamicPassThrough(const DynamicPassThrough&) = default;
__host__ __device__
explicit
constexpr DynamicPassThrough(DynamicPassThrough&&) = default;
__host__ __device__
constexpr DynamicPassThrough(DynamicPassThrough&&) = default;
#else
#else
__host__
__device__
explicit
constexpr
DynamicPassThrough
(
const
DynamicPassThrough
&
other
)
__host__
__device__
constexpr
DynamicPassThrough
(
const
DynamicPassThrough
&
other
)
:
up_lengths_
{
other
.
up_lengths_
}
:
up_lengths_
{
other
.
up_lengths_
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicPassThrough
(
DynamicPassThrough
&&
other
)
__host__
__device__
constexpr
DynamicPassThrough
(
DynamicPassThrough
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
}
:
up_lengths_
{
other
.
up_lengths_
}
{
{
}
}
#endif
#endif
__host__
__device__
explicit
constexpr
DynamicPassThrough
(
const
index_t
&
low_length
)
__host__
__device__
constexpr
DynamicPassThrough
(
const
index_t
&
low_length
)
:
up_lengths_
{
make_multi_index
(
low_length
)}
:
up_lengths_
{
make_multi_index
(
low_length
)}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicPassThrough
()
:
up_lengths_
{
0
}
{}
__host__
__device__
constexpr
DynamicPassThrough
()
:
up_lengths_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
...
@@ -89,28 +89,27 @@ struct DynamicLeftPad
...
@@ -89,28 +89,27 @@ struct DynamicLeftPad
const
index_t
left_pad_
;
const
index_t
left_pad_
;
#if 0
#if 0
__host__ __device__
explicit
constexpr DynamicLeftPad(const DynamicLeftPad&) = default;
__host__ __device__
constexpr DynamicLeftPad(const DynamicLeftPad&) = default;
__host__ __device__
explicit
constexpr DynamicLeftPad(DynamicLeftPad&&) = default;
__host__ __device__
constexpr DynamicLeftPad(DynamicLeftPad&&) = default;
#else
#else
__host__
__device__
explicit
constexpr
DynamicLeftPad
(
const
DynamicLeftPad
&
other
)
__host__
__device__
constexpr
DynamicLeftPad
(
const
DynamicLeftPad
&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicLeftPad
(
DynamicLeftPad
&&
other
)
__host__
__device__
constexpr
DynamicLeftPad
(
DynamicLeftPad
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
:
up_lengths_
{
other
.
up_lengths_
},
left_pad_
{
other
.
left_pad_
}
{
{
}
}
#endif
#endif
__host__
__device__
explicit
constexpr
DynamicLeftPad
(
const
index_t
&
low_length
,
__host__
__device__
constexpr
DynamicLeftPad
(
const
index_t
&
low_length
,
const
index_t
&
left_pad
)
const
index_t
&
left_pad
)
:
up_lengths_
{
make_multi_index
(
low_length
+
left_pad
)},
left_pad_
{
left_pad
}
:
up_lengths_
{
make_multi_index
(
low_length
+
left_pad
)},
left_pad_
{
left_pad
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicLeftPad
()
:
up_lengths_
{
0
},
left_pad_
{
0
}
{}
__host__
__device__
constexpr
DynamicLeftPad
()
:
up_lengths_
{
0
},
left_pad_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
...
@@ -168,18 +167,18 @@ struct DynamicRightPad
...
@@ -168,18 +167,18 @@ struct DynamicRightPad
const
index_t
right_pad_
;
const
index_t
right_pad_
;
#if 0
#if 0
__host__ __device__
explicit
constexpr DynamicRightPad(const DynamicRightPad&) = default;
__host__ __device__
constexpr DynamicRightPad(const DynamicRightPad&) = default;
__host__ __device__
explicit
constexpr DynamicRightPad(DynamicRightPad&&) = default;
__host__ __device__
constexpr DynamicRightPad(DynamicRightPad&&) = default;
#else
#else
__host__
__device__
explicit
constexpr
DynamicRightPad
(
const
DynamicRightPad
&
other
)
__host__
__device__
constexpr
DynamicRightPad
(
const
DynamicRightPad
&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
:
up_lengths_
{
other
.
up_lengths_
},
low_length_
{
other
.
low_length_
},
low_length_
{
other
.
low_length_
},
right_pad_
{
other
.
right_pad_
}
right_pad_
{
other
.
right_pad_
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicRightPad
(
DynamicRightPad
&&
other
)
__host__
__device__
constexpr
DynamicRightPad
(
DynamicRightPad
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
:
up_lengths_
{
other
.
up_lengths_
},
low_length_
{
other
.
low_length_
},
low_length_
{
other
.
low_length_
},
right_pad_
{
other
.
right_pad_
}
right_pad_
{
other
.
right_pad_
}
...
@@ -187,7 +186,7 @@ struct DynamicRightPad
...
@@ -187,7 +186,7 @@ struct DynamicRightPad
}
}
#endif
#endif
__host__
__device__
explicit
constexpr
DynamicRightPad
(
const
index_t
&
low_length
,
__host__
__device__
constexpr
DynamicRightPad
(
const
index_t
&
low_length
,
const
index_t
&
right_pad
)
const
index_t
&
right_pad
)
:
up_lengths_
{
make_multi_index
(
low_length
+
right_pad
)},
:
up_lengths_
{
make_multi_index
(
low_length
+
right_pad
)},
low_length_
{
low_length
},
low_length_
{
low_length
},
...
@@ -195,8 +194,7 @@ struct DynamicRightPad
...
@@ -195,8 +194,7 @@ struct DynamicRightPad
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicRightPad
()
__host__
__device__
constexpr
DynamicRightPad
()
:
up_lengths_
{
0
},
low_length_
{
0
},
right_pad_
{
0
}
:
up_lengths_
{
0
},
low_length_
{
0
},
right_pad_
{
0
}
{
{
}
}
...
@@ -256,21 +254,21 @@ struct DynamicEmbed
...
@@ -256,21 +254,21 @@ struct DynamicEmbed
const
UpperIndex
coefficients_
;
const
UpperIndex
coefficients_
;
#if 0
#if 0
__host__ __device__
explicit
constexpr DynamicEmbed(const DynamicEmbed&) = default;
__host__ __device__
constexpr DynamicEmbed(const DynamicEmbed&) = default;
__host__ __device__
explicit
constexpr DynamicEmbed(DynamicEmbed&&) = default;
__host__ __device__
constexpr DynamicEmbed(DynamicEmbed&&) = default;
#else
#else
__host__
__device__
explicit
constexpr
DynamicEmbed
(
const
DynamicEmbed
&
other
)
__host__
__device__
constexpr
DynamicEmbed
(
const
DynamicEmbed
&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicEmbed
(
DynamicEmbed
&&
other
)
__host__
__device__
constexpr
DynamicEmbed
(
DynamicEmbed
&&
other
)
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
:
up_lengths_
{
other
.
up_lengths_
},
coefficients_
{
other
.
coefficients_
}
{
{
}
}
#endif
#endif
__host__
__device__
explicit
constexpr
DynamicEmbed
(
const
UpperIndex
&
up_lengths
,
__host__
__device__
constexpr
DynamicEmbed
(
const
UpperIndex
&
up_lengths
,
const
UpperIndex
&
coefficients
)
const
UpperIndex
&
coefficients
)
:
up_lengths_
{
up_lengths
},
coefficients_
{
coefficients
}
:
up_lengths_
{
up_lengths
},
coefficients_
{
coefficients
}
{
{
...
@@ -278,13 +276,13 @@ struct DynamicEmbed
...
@@ -278,13 +276,13 @@ struct DynamicEmbed
}
}
template
<
typename
UpperLengths
,
typename
Coefficients
>
template
<
typename
UpperLengths
,
typename
Coefficients
>
__host__
__device__
explicit
constexpr
DynamicEmbed
(
const
UpperLengths
&
up_lengths
,
__host__
__device__
constexpr
DynamicEmbed
(
const
UpperLengths
&
up_lengths
,
const
Coefficients
&
coefficients
)
const
Coefficients
&
coefficients
)
:
up_lengths_
{
up_lengths
},
coefficients_
{
coefficients
}
:
up_lengths_
{
up_lengths
},
coefficients_
{
coefficients
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicEmbed
()
__host__
__device__
constexpr
DynamicEmbed
()
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
coefficients_
{
make_zero_multi_index
<
NDimUp
>
()}
coefficients_
{
make_zero_multi_index
<
NDimUp
>
()}
{
{
...
@@ -352,18 +350,18 @@ struct DynamicMerge
...
@@ -352,18 +350,18 @@ struct DynamicMerge
const
UpperIndex
up_lengths_
;
const
UpperIndex
up_lengths_
;
#if 0
#if 0
__host__ __device__
explicit
constexpr DynamicMerge(const DynamicMerge&) = default;
__host__ __device__
constexpr DynamicMerge(const DynamicMerge&) = default;
__host__ __device__
explicit
constexpr DynamicMerge(DynamicMerge&&) = default;
__host__ __device__
constexpr DynamicMerge(DynamicMerge&&) = default;
#else
#else
__host__
__device__
explicit
constexpr
DynamicMerge
(
const
DynamicMerge
&
other
)
__host__
__device__
constexpr
DynamicMerge
(
const
DynamicMerge
&
other
)
:
low_lengths_
{
other
.
low_lengths_
},
:
low_lengths_
{
other
.
low_lengths_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
up_lengths_
{
other
.
up_lengths_
}
up_lengths_
{
other
.
up_lengths_
}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicMerge
(
DynamicMerge
&&
other
)
__host__
__device__
constexpr
DynamicMerge
(
DynamicMerge
&&
other
)
:
low_lengths_
{
other
.
low_lengths_
},
:
low_lengths_
{
other
.
low_lengths_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
low_lengths_scan_
{
other
.
low_lengths_scan_
},
up_lengths_
{
other
.
up_lengths_
}
up_lengths_
{
other
.
up_lengths_
}
...
@@ -371,9 +369,9 @@ struct DynamicMerge
...
@@ -371,9 +369,9 @@ struct DynamicMerge
}
}
#endif
#endif
__host__
__device__
explicit
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
:
low_lengths_
{
low_lengths
},
:
low_lengths_
{
low_lengths
},
low_lengths_scan_
{
reverse_exclusive_scan_on_array
(
low_lengths_scan_
{
reverse_exclusive_scan_on_array
<
index_t
,
NDimLow
>
(
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
up_lengths_
{
make_multi_index
(
up_lengths_
{
make_multi_index
(
reduce_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
reduce_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
...
@@ -381,7 +379,7 @@ struct DynamicMerge
...
@@ -381,7 +379,7 @@ struct DynamicMerge
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
}
}
__host__
__device__
explicit
constexpr
DynamicMerge
()
__host__
__device__
constexpr
DynamicMerge
()
:
low_lengths_
{
make_zero_multi_index
<
NDimLow
>
()},
:
low_lengths_
{
make_zero_multi_index
<
NDimLow
>
()},
low_lengths_scan_
{
make_zero_multi_index
<
NDimLow
>
()},
low_lengths_scan_
{
make_zero_multi_index
<
NDimLow
>
()},
up_lengths_
{
0
}
up_lengths_
{
0
}
...
@@ -515,14 +513,14 @@ struct DynamicUnMerge
...
@@ -515,14 +513,14 @@ struct DynamicUnMerge
const
UpperIndex
up_lengths_
;
const
UpperIndex
up_lengths_
;
const
UpperIndex
up_lengths_scan_
;
const
UpperIndex
up_lengths_scan_
;
__host__
__device__
explicit
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
:
up_lengths_
{
up_lengths
},
:
up_lengths_
{
up_lengths
},
up_lengths_scan_
{
up_lengths_scan_
{
reverse_exclusive_scan_on_array
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
reverse_exclusive_scan_on_array
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
{
{
}
}
__host__
__device__
explicit
constexpr
DynamicUnMerge
()
__host__
__device__
constexpr
DynamicUnMerge
()
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
:
up_lengths_
{
make_zero_multi_index
<
NDimUp
>
()},
up_lengths_scan_
{
make_zero_multi_index
<
NDimUp
>
()}
up_lengths_scan_
{
make_zero_multi_index
<
NDimUp
>
()}
{
{
...
@@ -575,11 +573,9 @@ struct DynamicFreeze
...
@@ -575,11 +573,9 @@ struct DynamicFreeze
const
index_t
low_idx_
;
const
index_t
low_idx_
;
__host__
__device__
explicit
constexpr
DynamicFreeze
(
const
index_t
&
low_idx
)
:
low_idx_
{
low_idx
}
__host__
__device__
constexpr
DynamicFreeze
(
const
index_t
&
low_idx
)
:
low_idx_
{
low_idx
}
{}
{
}
__host__
__device__
explicit
constexpr
DynamicFreeze
()
:
low_idx_
{
0
}
{}
__host__
__device__
constexpr
DynamicFreeze
()
:
low_idx_
{
0
}
{}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
1
;
}
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
View file @
a339028c
...
@@ -119,7 +119,10 @@ struct DynamicTensorDescriptor_v2
...
@@ -119,7 +119,10 @@ struct DynamicTensorDescriptor_v2
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
{
return
hidden_lengths_
[
0
];
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
{
return
hidden_lengths_
[
Number
<
0
>
{}];
}
template
<
typename
Idx
>
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
...
@@ -152,7 +155,7 @@ struct DynamicTensorDescriptor_v2
...
@@ -152,7 +155,7 @@ struct DynamicTensorDescriptor_v2
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
});
});
return
idx_hidden
[
0
];
return
idx_hidden
[
Number
<
0
>
{}
];
#endif
#endif
}
}
...
@@ -266,7 +269,7 @@ struct DynamicTensorCoordinate_v2
...
@@ -266,7 +269,7 @@ struct DynamicTensorCoordinate_v2
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
return
GetVisibleIndex
();
}
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
return
GetVisibleIndex
();
}
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
0
];
}
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
Number
<
0
>
{}
];
}
// private:
// private:
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
...
@@ -517,7 +520,7 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
...
@@ -517,7 +520,7 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
using
HiddenIndex
=
MultiIndex
<
ndim_hidden
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden
>
;
// this is what needs to be calculated
// this is what needs to be calculated
auto
idx_diff_hidden
=
HiddenIndex
{{
0
}}
;
auto
idx_diff_hidden
=
make_zero_multi_index
<
ndim_hidden
>
()
;
// initialize visible index diff
// initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
...
...
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
a339028c
...
@@ -208,7 +208,7 @@ struct Merge
...
@@ -208,7 +208,7 @@ struct Merge
{
{
LowerIndex
idx_low
;
LowerIndex
idx_low
;
index_t
itmp
=
idx_up
[
0
];
index_t
itmp
=
idx_up
[
Number
<
0
>
{}
];
constexpr
auto
pseudo_low_strides
=
constexpr
auto
pseudo_low_strides
=
reverse_inclusive_scan_sequence
(
reverse_inclusive_scan_sequence
(
...
@@ -218,7 +218,7 @@ struct Merge
...
@@ -218,7 +218,7 @@ struct Merge
static_for
<
0
,
nDimLow
-
1
,
1
>
{}(
static_for
<
0
,
nDimLow
-
1
,
1
>
{}(
lambda_CalculateLowerIndex
<
decltype
(
pseudo_low_strides
)
>
(
itmp
,
idx_low
));
lambda_CalculateLowerIndex
<
decltype
(
pseudo_low_strides
)
>
(
itmp
,
idx_low
));
idx_low
(
nDimLow
-
1
)
=
itmp
/
pseudo_low_strides
[
nDimLow
-
1
];
idx_low
(
Number
<
nDimLow
-
1
>
{}
)
=
itmp
/
pseudo_low_strides
[
Number
<
nDimLow
-
1
>
{}
];
return
idx_low
;
return
idx_low
;
}
}
...
...
composable_kernel/include/utility/array_element_picker.hpp
View file @
a339028c
...
@@ -12,7 +12,9 @@ template <typename Arr, typename Picks>
...
@@ -12,7 +12,9 @@ template <typename Arr, typename Picks>
struct
ArrayElementPicker
struct
ArrayElementPicker
{
{
using
type
=
ArrayElementPicker
;
using
type
=
ArrayElementPicker
;
#if 0
using data_type = typename Arr::data_type;
using data_type = typename Arr::data_type;
#endif
__host__
__device__
constexpr
ArrayElementPicker
()
=
delete
;
__host__
__device__
constexpr
ArrayElementPicker
()
=
delete
;
...
@@ -26,20 +28,20 @@ struct ArrayElementPicker
...
@@ -26,20 +28,20 @@ struct ArrayElementPicker
__host__
__device__
static
constexpr
auto
Size
()
{
return
Picks
::
Size
();
}
__host__
__device__
static
constexpr
auto
Size
()
{
return
Picks
::
Size
();
}
template
<
index_t
I
>
template
<
index_t
I
>
__host__
__device__
constexpr
const
data_type
&
At
(
Number
<
I
>
)
const
__host__
__device__
constexpr
const
auto
&
At
(
Number
<
I
>
i
)
const
{
{
static_assert
(
I
<
Size
(),
"wrong!"
);
static_assert
(
I
<
Size
(),
"wrong!"
);
constexpr
auto
IP
=
Picks
{}[
I
];
constexpr
auto
IP
=
Picks
{}[
i
];
return
mArray
[
IP
];
return
mArray
[
IP
];
}
}
template
<
index_t
I
>
template
<
index_t
I
>
__host__
__device__
constexpr
data_type
&
At
(
Number
<
I
>
)
__host__
__device__
constexpr
auto
&
At
(
Number
<
I
>
i
)
{
{
static_assert
(
I
<
Size
(),
"wrong!"
);
static_assert
(
I
<
Size
(),
"wrong!"
);
constexpr
auto
IP
=
Picks
{}[
I
];
constexpr
auto
IP
=
Picks
{}[
i
];
return
mArray
(
IP
);
return
mArray
(
IP
);
}
}
...
...
composable_kernel/include/utility/array_helper.hpp
View file @
a339028c
...
@@ -133,7 +133,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
...
@@ -133,7 +133,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
template
<
typename
TData
,
typename
Arr
,
typename
Reduce
>
template
<
typename
TData
,
typename
Arr
,
typename
Reduce
>
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Arr
&
a
,
Reduce
f
,
TData
init
)
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Arr
&
a
,
Reduce
f
,
TData
init
)
{
{
static_assert
(
is_same
<
typename
Arr
::
data_type
,
TData
>::
value
,
"wrong! different data type"
);
//
static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
static_assert
(
Arr
::
Size
()
>
0
,
"wrong"
);
static_assert
(
Arr
::
Size
()
>
0
,
"wrong"
);
TData
result
=
init
;
TData
result
=
init
;
...
@@ -151,12 +151,22 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -151,12 +151,22 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
TData
r
=
init
;
TData
r
=
init
;
#if 0
#pragma unroll
#pragma unroll
for(index_t i = NSize - 1; i >= 0; --i)
for(index_t i = NSize - 1; i >= 0; --i)
{
{
r = f(r, x[i]);
r = f(r, x[i]);
y(i) = r;
y(i) = r;
}
}
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
return
y
;
}
}
...
@@ -169,6 +179,7 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -169,6 +179,7 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
TData
r
=
init
;
TData
r
=
init
;
#if 0
#pragma unroll
#pragma unroll
for(index_t i = NSize - 1; i > 0; --i)
for(index_t i = NSize - 1; i > 0; --i)
{
{
...
@@ -177,6 +188,14 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -177,6 +188,14 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
}
}
y(0) = r;
y(0) = r;
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
});
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
return
y
;
}
}
...
...
composable_kernel/include/utility/print.hpp
View file @
a339028c
...
@@ -10,7 +10,7 @@ namespace ck {
...
@@ -10,7 +10,7 @@ namespace ck {
template
<
typename
T
>
template
<
typename
T
>
__host__
__device__
void
print_array
(
const
char
*
s
,
T
a
)
__host__
__device__
void
print_array
(
const
char
*
s
,
T
a
)
{
{
using
data_type
=
typename
decltype
(
a
)
::
data_type
;
using
data_type
=
decltype
(
a
.
At
(
Number
<
0
>
{}))
;
constexpr
index_t
nsize
=
a
.
Size
();
constexpr
index_t
nsize
=
a
.
Size
();
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
...
@@ -30,7 +30,7 @@ __host__ __device__ void print_array(const char* s, T a)
...
@@ -30,7 +30,7 @@ __host__ __device__ void print_array(const char* s, T a)
template
<
typename
T
>
template
<
typename
T
>
__host__
__device__
void
print_array_v2
(
const
char
*
s
,
T
a
)
__host__
__device__
void
print_array_v2
(
const
char
*
s
,
T
a
)
{
{
using
data_type
=
typename
decltype
(
a
)
::
data_type
;
using
data_type
=
decltype
(
a
.
At
(
Number
<
0
>
{}))
;
constexpr
index_t
nsize
=
a
.
Size
();
constexpr
index_t
nsize
=
a
.
Size
();
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
...
...
composable_kernel/include/utility/statically_indexed_array.hpp
View file @
a339028c
...
@@ -7,493 +7,6 @@
...
@@ -7,493 +7,6 @@
namespace
ck
{
namespace
ck
{
#if 0
template <typename T, index_t NSize>
struct StaticallyIndexedArray
{
};
template <typename T>
struct StaticallyIndexedArray<T, 0> : public Tuple<>
{
using data_type = T;
using base = Tuple<>;
__host__ __device__ explicit constexpr StaticallyIndexedArray() : base() {}
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
};
template <typename T>
struct StaticallyIndexedArray<T, 1> : public Tuple<T>
{
using type = StaticallyIndexedArray;
using data_type = T;
using base = Tuple<T>;
static constexpr index_t nsize = base::Size();
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
template <typename Y>
__host__
__device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray<Y, nsize>& y)
: base(static_cast<const Tuple<Y>&>(y))
{
}
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray<Y, nsize>&& y)
: base(static_cast<Tuple<Y>&&>(y))
{
}
#if 0
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == base::Size(),
bool>::type = false>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
static_assert(sizeof...(Ys) == nsize, "wrong! inconsistent size");
}
#else
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Y&& y)
: base(std::forward<Y>(y))
{
}
#endif
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
2
>
:
public
Tuple
<
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
>
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
template
<
typename
Y
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
<
Y
,
2
>&
y
)
:
base
(
static_cast
<
const
Tuple
<
Y
,
Y
>&>
(
y
))
{
}
template
<
typename
Y
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
<
Y
,
2
>&&
y
)
:
base
(
static_cast
<
Tuple
<
Y
,
Y
>&&>
(
y
))
{
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
base
::
Size
(),
bool
>
::
type
=
false
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
static_assert
(
sizeof
...(
Ys
)
==
2
,
"wrong! inconsistent size"
);
}
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
3
>
:
public
Tuple
<
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
4
>
:
public
Tuple
<
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
5
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
6
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
7
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
8
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
9
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
10
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
11
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
12
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
13
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
14
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
15
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
16
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
17
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
18
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
19
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
20
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
21
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
template
<
typename
T
>
struct
StaticallyIndexedArray
<
T
,
22
>
:
public
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
{
using
data_type
=
T
;
using
base
=
Tuple
<
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
,
T
>
;
template
<
typename
...
Ys
>
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
}
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
const
StaticallyIndexedArray
&
)
=
default
;
__host__
__device__
explicit
constexpr
StaticallyIndexedArray
(
StaticallyIndexedArray
&&
)
=
default
;
};
#else
namespace
detail
{
namespace
detail
{
template
<
typename
T
,
index_t
NSize
>
template
<
typename
T
,
index_t
NSize
>
...
@@ -507,48 +20,8 @@ using same_type_tuple = decltype(generate_same_type_tuple<T, NSize>());
...
@@ -507,48 +20,8 @@ using same_type_tuple = decltype(generate_same_type_tuple<T, NSize>());
}
// namespace detail
}
// namespace detail
#if 0
template <typename T, index_t NSize>
struct StaticallyIndexedArray : public detail::same_type_tuple<T, NSize>
{
using type = StaticallyIndexedArray;
using data_type = T;
using base = detail::same_type_tuple<T, NSize>;
__host__ __device__ explicit constexpr StaticallyIndexedArray(const StaticallyIndexedArray&) =
default;
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray&&) = default;
template <typename Y>
__host__ __device__ explicit constexpr StaticallyIndexedArray(
const StaticallyIndexedArray<Y, NSize>& y)
: base(static_cast<const detail::same_type_tuple<Y, NSize>&>(y))
{
}
template <typename Y>
__host__
__device__ explicit constexpr StaticallyIndexedArray(StaticallyIndexedArray<Y, NSize>&& y)
: base(static_cast<detail::same_type_tuple<Y, NSize>&&>(y))
{
}
template <typename... Ys,
typename std::enable_if<sizeof...(Ys) == base::Size(), bool>::type = false>
__host__ __device__ explicit constexpr StaticallyIndexedArray(Ys&&... ys)
: base(std::forward<Ys>(ys)...)
{
static_assert(sizeof...(Ys) == NSize, "wrong! inconsistent size");
}
};
#else
template
<
typename
T
,
index_t
NSize
>
template
<
typename
T
,
index_t
NSize
>
using
StaticallyIndexedArray
=
detail
::
same_type_tuple
<
T
,
NSize
>
;
using
StaticallyIndexedArray
=
detail
::
same_type_tuple
<
T
,
NSize
>
;
#endif
#endif
template
<
typename
X
,
typename
...
Xs
>
template
<
typename
X
,
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_statically_indexed_array
(
const
X
&
x
,
const
Xs
&
...
xs
)
__host__
__device__
constexpr
auto
make_statically_indexed_array
(
const
X
&
x
,
const
Xs
&
...
xs
)
...
@@ -563,5 +36,42 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
...
@@ -563,5 +36,42 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
return
StaticallyIndexedArray
<
X
,
0
>
();
return
StaticallyIndexedArray
<
X
,
0
>
();
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_exclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
});
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_inclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/tuple.hpp
View file @
a339028c
...
@@ -124,9 +124,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -124,9 +124,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__
__device__
explicit
constexpr
Tuple
()
:
base
()
{}
__host__
__device__
explicit
constexpr
Tuple
()
:
base
()
{}
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
&
)
=
default
;
__host__
__device__
constexpr
Tuple
(
const
Tuple
&
)
=
default
;
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
&&
)
=
default
;
__host__
__device__
constexpr
Tuple
(
Tuple
&&
)
=
default
;
#if 0
#if 0
template <typename... Ys,
template <typename... Ys,
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
a339028c
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor_v2
<
4
>
(
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor_v2
<
4
>
(
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor_v2
<
4
>
(
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
...
@@ -124,11 +124,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -124,11 +124,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
index_t
*
const
,
index_t
*
const
,
float
*
const
,
float
*
const
,
float
*
const
,
float
*
const
,
const
decltype
(
in_gemmk_gemmn_global_desc
),
const
decltype
(
in_gemmk_gemmn_global_desc
)
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>>
,
dim3
(
GridSize
),
dim3
(
GridSize
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
...
@@ -136,11 +132,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -136,11 +132,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
static_cast
<
index_t
*>
(
wei_kcyx_device_buf
.
GetDeviceBuffer
()),
static_cast
<
index_t
*>
(
wei_kcyx_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
in_nchw_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
in_nchw_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
out_nkhw_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
out_nkhw_device_buf
.
GetDeviceBuffer
()),
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_global_desc
);
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
#endif
#endif
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment