Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
34cbbb48
Commit
34cbbb48
authored
Sep 30, 2020
by
Chao Liu
Browse files
refactored dynamically indexed array works now
parent
4d70c71b
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
230 additions
and
242 deletions
+230
-242
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
+15
-14
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+32
-10
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+3
-3
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
.../include/tensor_description/dynamic_tensor_coordinate.hpp
+1
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+7
-6
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+1
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+5
-5
composable_kernel/include/tensor_description/multi_index.hpp
composable_kernel/include/tensor_description/multi_index.hpp
+79
-19
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+73
-119
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+1
-1
composable_kernel/include/utility/functional3.hpp
composable_kernel/include/utility/functional3.hpp
+2
-2
composable_kernel/include/utility/functional4.hpp
composable_kernel/include/utility/functional4.hpp
+1
-1
composable_kernel/include/utility/statically_indexed_array.hpp
...sable_kernel/include/utility/statically_indexed_array.hpp
+0
-37
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+3
-9
composable_kernel/include/utility/tuple_helper.hpp
composable_kernel/include/utility/tuple_helper.hpp
+0
-7
driver/include/device_dummy_dynamic_transform_v1.hpp
driver/include/device_dummy_dynamic_transform_v1.hpp
+5
-5
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+1
-1
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
View file @
34cbbb48
...
@@ -97,10 +97,10 @@ struct DummyDynamicTransform_v1
...
@@ -97,10 +97,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
...
@@ -542,10 +542,10 @@ struct DummyDynamicTransform_v1
...
@@ -542,10 +542,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
const
auto
transformed_tensor_descs
=
const
auto
transformed_tensor_descs
=
map_convolution_into_gemm_v1
(
wei_k_c_y_x_global_desc
,
map_convolution_into_gemm_v1
(
wei_k_c_y_x_global_desc
,
...
@@ -564,7 +564,8 @@ struct DummyDynamicTransform_v1
...
@@ -564,7 +564,8 @@ struct DummyDynamicTransform_v1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
idx
);
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
idx
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
@@ -587,7 +588,7 @@ struct DummyDynamicTransform_v1
...
@@ -587,7 +588,7 @@ struct DummyDynamicTransform_v1
1
,
1
,
p_out_global
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
in_gemmk_gemmn_coord
.
GetOffset
(),
#if
0
#if
1
in_gemmk_gemmn_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
in_gemmk_gemmn_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
#else
#else
true
,
true
,
...
@@ -603,10 +604,10 @@ struct DummyDynamicTransform_v1
...
@@ -603,10 +604,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
Run_2
(
p_wei_global
,
Run_2
(
p_wei_global
,
p_in_global
,
p_in_global
,
...
...
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
34cbbb48
...
@@ -117,19 +117,41 @@ struct DummyDynamicTransform_v2_1
...
@@ -117,19 +117,41 @@ struct DummyDynamicTransform_v2_1
// initialize idx
// initialize idx
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
#pragma unroll 1
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
i
=
0
;
i
<
10
;
++
i
)
{
{
move_dynamic_tensor_coordinate_v2
(
move_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
p_out_global
[
in_gemmk_gemmn_coord
.
GetOffset
()]
=
1
;
// write
float
value
=
1
;
transfer_data
<
float
,
1
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
InMemoryDataOperation
::
Set
,
1
,
1
>
(
&
value
,
0
,
true
,
1
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
#if 1
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
),
#else
true
,
#endif
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
}
}
}
...
@@ -215,13 +237,13 @@ struct DummyDynamicTransform_v2_1
...
@@ -215,13 +237,13 @@ struct DummyDynamicTransform_v2_1
// initialize idx
// initialize idx
static_for
<
0
,
4
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
4
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
#if
0
#if
1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_coord
=
make_dynamic_tensor_coordinate_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
auto
in_coord
=
make_dynamic_tensor_coordinate_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_n_c_hip_wip_global_desc,
M
ulti
I
ndex
<4>{{
1, 0, 0, 0
}}
);
in_n_c_hip_wip_global_desc
,
make_m
ulti
_i
ndex
(
1
,
0
,
0
,
0
)
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
@@ -266,7 +288,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -266,7 +288,7 @@ struct DummyDynamicTransform_v2_1
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
Run_
1
(
p_wei_global
,
Run_
2
(
p_wei_global
,
p_in_global
,
p_in_global
,
p_out_global
,
p_out_global
,
wei_k_c_y_x_global_desc
,
wei_k_c_y_x_global_desc
,
...
@@ -293,7 +315,6 @@ struct DummyDynamicTransform_v2_2
...
@@ -293,7 +315,6 @@ struct DummyDynamicTransform_v2_2
// initialize idx
// initialize idx
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
#if 1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
...
@@ -322,13 +343,14 @@ struct DummyDynamicTransform_v2_2
...
@@ -322,13 +343,14 @@ struct DummyDynamicTransform_v2_2
1
,
1
,
p_out_global
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
in_gemmk_gemmn_coord
.
GetOffset
(),
#if 0
coordinate_has_valid_offset_assuming_visible_index_is_valid(
coordinate_has_valid_offset_assuming_visible_index_is_valid(
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord),
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord),
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
#else
#else
p_out_global
[
in_gemmk_gemmn_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
true
,
#endif
#endif
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
}
}
};
};
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
34cbbb48
...
@@ -371,10 +371,10 @@ struct DynamicMerge
...
@@ -371,10 +371,10 @@ struct DynamicMerge
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
:
low_lengths_
{
low_lengths
},
:
low_lengths_
{
low_lengths
},
low_lengths_scan_
{
reverse_exclusive_scan
_on_array
<
index_t
,
NDimLow
>
(
low_lengths_scan_
{
container_
reverse_exclusive_scan
<
index_t
,
NDimLow
>
(
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
up_lengths_
{
make_multi_index
(
up_lengths_
{
make_multi_index
(
reduce_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
container_reduce
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
{
{
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
}
}
...
@@ -516,7 +516,7 @@ struct DynamicUnMerge
...
@@ -516,7 +516,7 @@ struct DynamicUnMerge
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
:
up_lengths_
{
up_lengths
},
:
up_lengths_
{
up_lengths
},
up_lengths_scan_
{
up_lengths_scan_
{
reverse_exclusive_scan
_on_array
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
container_
reverse_exclusive_scan
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
{
{
}
}
...
...
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
View file @
34cbbb48
...
@@ -263,7 +263,7 @@ struct DynamicTensorCoordinate
...
@@ -263,7 +263,7 @@ struct DynamicTensorCoordinate
{
{
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
using
type
=
decltype
(
make_dynamic_tensor_coordinate
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
using
type
=
decltype
(
make_dynamic_tensor_coordinate
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
};
};
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
34cbbb48
...
@@ -45,7 +45,7 @@ struct DynamicNativeTensorDescriptor
...
@@ -45,7 +45,7 @@ struct DynamicNativeTensorDescriptor
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
...
@@ -136,7 +136,7 @@ struct DynamicTransformedTensorDescriptor
...
@@ -136,7 +136,7 @@ struct DynamicTransformedTensorDescriptor
template
<
typename
...
Xs
>
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
{
return
array
_cat
(
xs
...);
return
container
_cat
(
xs
...);
}
}
};
};
...
@@ -211,21 +211,22 @@ struct DynamicTransformedTensorDescriptor
...
@@ -211,21 +211,22 @@ struct DynamicTransformedTensorDescriptor
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
sorted_up_lengths
=
const
auto
sorted_up_lengths
=
reorder_
array_
given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
container_
reorder_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
return
sorted_up_lengths
;
return
sorted_up_lengths
;
}
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
__host__
__device__
constexpr
index_t
GetLength
(
index_t
idim
)
const
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
{
return
GetLengths
()[
idim
];
return
GetLengths
()[
Number
<
IDim
>
{}
];
}
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
View file @
34cbbb48
...
@@ -18,7 +18,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
...
@@ -18,7 +18,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
const
index_t
element_space_size
=
reduce_on_array
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
low_dim_hidden_idss
),
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
View file @
34cbbb48
...
@@ -116,7 +116,7 @@ struct DynamicTensorDescriptor_v2
...
@@ -116,7 +116,7 @@ struct DynamicTensorDescriptor_v2
// maybe this result should be saved as a member variable
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
...
@@ -411,13 +411,13 @@ transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& old_tensor_des
...
@@ -411,13 +411,13 @@ transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& old_tensor_des
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
// put everything together
// put everything together
const
auto
all_transforms
=
tuple
_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
const
auto
all_transforms
=
container
_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_hidden_idss
=
constexpr
auto
all_low_dim_hidden_idss
=
tuple
_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
container
_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
constexpr
auto
all_up_dim_hidden_idss
=
constexpr
auto
all_up_dim_hidden_idss
=
tuple
_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
container
_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_hidden_idss
),
decltype
(
all_low_dim_hidden_idss
),
...
@@ -494,7 +494,7 @@ make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& id
...
@@ -494,7 +494,7 @@ make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& id
// 2) all components of lower index diff will assume to be non-zero and need to be
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
// computed
const
bool
idx_diff_up_has_non_zero
=
const
bool
idx_diff_up_has_non_zero
=
reduce_on_array
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
container_reduce
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
...
...
composable_kernel/include/tensor_description/multi_index.hpp
View file @
34cbbb48
...
@@ -9,30 +9,11 @@ namespace ck {
...
@@ -9,30 +9,11 @@ namespace ck {
template
<
index_t
N
>
template
<
index_t
N
>
using
MultiIndex
=
Array
<
index_t
,
N
>
;
using
MultiIndex
=
Array
<
index_t
,
N
>
;
#if 1 // works
template
<
typename
...
Xs
>
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
{
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
}
#else // doesn't work: don't know how to get the lvalue/rvalue reference correct
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
Xs
&&
...
xs
)
{
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
#endif
#else
template
<
index_t
N
>
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
return
make_statically_indexed_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
#endif
template
<
index_t
NSize
>
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
__host__
__device__
constexpr
auto
make_zero_multi_index
()
...
@@ -93,5 +74,84 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
...
@@ -93,5 +74,84 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
return
r
;
return
r
;
}
}
#else
template
<
index_t
N
>
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
return
make_statically_indexed_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
{
return
unpack
([](
auto
...
xs
)
{
return
make_multi_index
(
xs
...);
},
typename
uniform_sequence_gen
<
NSize
,
0
>::
type
{});
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
to_multi_index
(
const
T
&
x
)
{
return
unpack
([](
auto
...
ys
)
{
return
make_multi_index
(
ys
...);
},
x
);
}
// Here should use MultiIndex<NSize>, instead of Tuple<Ys...>, although the former
// is the alias of the latter. This is because compiler cannot infer the NSize if
// using MultiIndex<NSize>
// TODO: how to fix this?
template
<
typename
...
Ys
,
typename
X
>
__host__
__device__
constexpr
auto
operator
+=
(
Tuple
<
Ys
...
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
sizeof
...(
Ys
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Ys
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
+=
x
[
i
];
});
return
y
;
}
template
<
typename
...
Ys
,
typename
X
>
__host__
__device__
constexpr
auto
operator
-=
(
Tuple
<
Ys
...
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
sizeof
...(
Ys
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Ys
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
-=
x
[
i
];
});
return
y
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
+
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
+
y
[
i
];
});
return
r
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
-
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
-
y
[
i
];
});
return
r
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
*
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
*
y
[
i
];
});
return
r
;
}
#endif
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/array_helper.hpp
View file @
34cbbb48
...
@@ -3,17 +3,17 @@
...
@@ -3,17 +3,17 @@
#include "sequence.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "sequence_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "array.hpp"
#include "array.hpp"
#include "array_helper.hpp"
#include "array_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "statically_indexed_array.hpp"
#include "statically_indexed_array.hpp"
#include "array_element_picker.hpp"
#include "array_element_picker.hpp"
namespace
ck
{
namespace
ck
{
template
<
typename
TData
,
index_t
NSize
>
template
<
typename
TData
,
index_t
NSize
>
__host__
__device__
constexpr
auto
push_back
(
const
Array
<
TData
,
NSize
>&
a
,
const
TData
&
x
)
__host__
__device__
constexpr
auto
container_
push_back
(
const
Array
<
TData
,
NSize
>&
a
,
const
TData
&
x
)
{
{
Array
<
TData
,
NSize
+
1
>
r
;
Array
<
TData
,
NSize
+
1
>
r
;
...
@@ -25,137 +25,64 @@ __host__ __device__ constexpr auto push_back(const Array<TData, NSize>& a, const
...
@@ -25,137 +25,64 @@ __host__ __device__ constexpr auto push_back(const Array<TData, NSize>& a, const
}
}
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
__host__
__device__
constexpr
auto
reorder_array_given_new2old
(
const
Array
<
TData
,
NSize
>&
old_array
,
__host__
__device__
constexpr
auto
Sequence
<
IRs
...
>
/*new2old*/
)
container_reorder_given_new2old
(
const
Array
<
TData
,
NSize
>&
old_array
,
Sequence
<
IRs
...
>
/*new2old*/
)
{
{
static_assert
(
NSize
==
sizeof
...(
IRs
),
"
NS
ize not consistent"
);
static_assert
(
NSize
==
sizeof
...(
IRs
),
"
wrong! s
ize not consistent"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
return
Array
<
TData
,
NSize
>
{
old_array
[
IRs
]...
}
;
return
make_array
(
old_array
[
Number
<
IRs
>
{}
]...
)
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
MapOld2New
>
struct
lambda_reorder_array_given_old2new
{
const
Array
<
TData
,
NSize
>&
old_array
;
Array
<
TData
,
NSize
>&
new_array
;
__host__
__device__
constexpr
lambda_reorder_array_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array_
,
Array
<
TData
,
NSize
>&
new_array_
)
:
old_array
(
old_array_
),
new_array
(
new_array_
)
{
}
template
<
index_t
IOldDim
>
__host__
__device__
constexpr
void
operator
()(
Number
<
IOldDim
>
)
const
{
TData
old_data
=
old_array
[
IOldDim
];
constexpr
index_t
INewDim
=
MapOld2New
::
At
(
Number
<
IOldDim
>
{});
new_array
(
Number
<
INewDim
>
{})
=
old_data
;
}
};
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
__host__
__device__
constexpr
auto
reorder_array_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array
,
__host__
__device__
constexpr
auto
Sequence
<
IRs
...
>
/*
old2new
*/
)
container_reorder_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array
,
Sequence
<
IRs
...
>
old2new
)
{
{
Array
<
TData
,
NSize
>
new_array
;
return
container_reorder_given_new2old
(
old_array
,
typename
sequence_map_inverse
<
decltype
(
old2new
)
>::
type
{});
static_assert
(
NSize
==
sizeof
...(
IRs
),
"NSize not consistent"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>::
value
,
"wrong! invalid reorder map"
);
static_for
<
0
,
NSize
,
1
>
{}(
lambda_reorder_array_given_old2new
<
TData
,
NSize
,
Sequence
<
IRs
...
>>
(
old_array
,
new_array
));
return
new_array
;
}
}
//
em
u
late
constepxr lambda for array
t
em
p
late
<
typename
...
Ts
,
index_t
...
IRs
>
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
>
__host__
__device__
constexpr
auto
container_reorder_given_new2old
(
const
Tuple
<
Ts
...
>&
old_tuple
,
struct
lambda_array_math
Sequence
<
IRs
...
>
/*new2old*/
)
{
{
const
F
&
f
;
static_assert
(
sizeof
...(
Ts
)
==
sizeof
...(
IRs
),
"wrong! size not consistent"
);
const
X
&
x
;
const
Y
&
y
;
Z
&
z
;
__host__
__device__
constexpr
lambda_array_math
(
const
F
&
f_
,
const
X
&
x_
,
const
Y
&
y_
,
Z
&
z_
)
:
f
(
f_
),
x
(
x_
),
y
(
y_
),
z
(
z_
)
{
}
template
<
index_t
IDim_
>
__host__
__device__
constexpr
void
operator
()(
Number
<
IDim_
>
)
const
{
constexpr
auto
IDim
=
Number
<
IDim_
>
{};
z
(
IDim
)
=
f
(
x
[
IDim
],
y
[
IDim
]);
}
};
// Array = Sequence - Array
template
<
typename
TData
,
index_t
NSize
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
operator
-
(
Sequence
<
Is
...
>
a
,
Array
<
TData
,
NSize
>
b
)
{
static_assert
(
sizeof
...(
Is
)
==
NSize
,
"wrong! size not the same"
);
Array
<
TData
,
NSize
>
result
;
auto
f
=
math
::
minus
<
index_t
>
{};
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
static_for
<
0
,
NSize
,
1
>
{}(
lambda_array_math
<
decltype
(
f
),
decltype
(
a
),
decltype
(
b
),
decltype
(
result
)
>
(
f
,
a
,
b
,
result
));
return
result
;
return
make_tuple
(
old_tuple
[
Number
<
IRs
>
{}]...)
;
}
}
// Array = Array * TData
template
<
typename
...
Ts
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
>
__host__
__device__
constexpr
auto
container_reorder_given_old2new
(
const
Tuple
<
Ts
...
>&
old_tuple
,
__host__
__device__
constexpr
auto
operator
*
(
TData
v
,
Array
<
TData
,
NSize
>
a
)
Sequence
<
IRs
...
>
old2new
)
{
{
Array
<
TData
,
NSize
>
result
;
return
container_reorder_given_new2old
(
old_tuple
,
typename
sequence_map_inverse
<
decltype
(
old2new
)
>::
type
{});
for
(
index_t
i
=
0
;
i
<
NSize
;
++
i
)
{
result
(
i
)
=
a
[
i
]
*
v
;
}
return
result
;
}
}
template
<
typename
TData
,
typename
Ar
r
,
typename
Reduce
>
template
<
typename
TData
,
typename
Containe
r
,
typename
Reduce
>
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Ar
r
&
a
,
Reduce
f
,
TData
init
)
__host__
__device__
constexpr
TData
container_reduce
(
const
Containe
r
&
a
,
Reduce
f
,
TData
init
)
{
{
// static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
// static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
static_assert
(
Ar
r
::
Size
()
>
0
,
"wrong"
);
static_assert
(
Containe
r
::
Size
()
>
0
,
"wrong"
);
TData
result
=
init
;
TData
result
=
init
;
static_for
<
0
,
Ar
r
::
Size
(),
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
static_for
<
0
,
Containe
r
::
Size
(),
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
return
result
;
return
result
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
reverse_inclusive_scan
_on_array
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
container_
reverse_inclusive_scan
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
{
Array
<
TData
,
NSize
>
y
;
Array
<
TData
,
NSize
>
y
;
TData
r
=
init
;
TData
r
=
init
;
#if 0
#pragma unroll
for(index_t i = NSize - 1; i >= 0; --i)
{
r = f(r, x[i]);
y(i) = r;
}
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
y
(
i
)
=
r
;
...
@@ -163,36 +90,61 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -163,36 +90,61 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
return
y
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
reverse_exclusive_scan
_on_array
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
container_
reverse_exclusive_scan
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
{
Array
<
TData
,
NSize
>
y
;
Array
<
TData
,
NSize
>
y
;
TData
r
=
init
;
TData
r
=
init
;
#if 0
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
#pragma unroll
for(index_t i = NSize - 1; i > 0; --i)
{
y
(
i
)
=
r
;
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
}
});
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
container_reverse_exclusive_scan
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
y(0) = r;
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
});
});
y
(
Number
<
0
>
{})
=
r
;
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
container_reverse_inclusive_scan
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
return
y
;
return
y
;
}
}
...
@@ -204,19 +156,21 @@ __host__ __device__ constexpr auto container_cat(const X& x, const Ys&... ys)
...
@@ -204,19 +156,21 @@ __host__ __device__ constexpr auto container_cat(const X& x, const Ys&... ys)
}
}
template
<
typename
T
,
index_t
NX
,
index_t
NY
>
template
<
typename
T
,
index_t
NX
,
index_t
NY
>
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
NX
>&
x
,
const
Array
<
T
,
NY
>&
y
)
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
NX
>&
a
x
,
const
Array
<
T
,
NY
>&
a
y
)
{
{
Array
<
T
,
NX
+
NY
>
z
;
return
unpack2
(
[
&
](
auto
&&
...
zs
)
{
return
make_array
(
std
::
forward
<
decltype
(
zs
)
>
(
zs
)...);
},
ax
,
ay
);
static_for
<
0
,
NX
,
1
>
{}([
&
](
auto
i
)
{
z
(
i
)
=
x
[
i
];
});
}
static_for
<
0
,
NY
,
1
>
{}([
&
](
auto
i
)
{
z
(
i
+
Number
<
NX
>
{})
=
y
[
i
];
});
return
z
;
template
<
typename
...
X
,
typename
...
Y
>
__host__
__device__
constexpr
auto
container_cat
(
const
Tuple
<
X
...
>&
tx
,
const
Tuple
<
Y
...
>&
ty
)
{
return
unpack2
(
[
&
](
auto
&&
...
zs
)
{
return
make_tuple
(
std
::
forward
<
decltype
(
zs
)
>
(
zs
)...);
},
tx
,
ty
);
}
}
template
<
typename
T
,
index_t
N
>
template
<
typename
Container
>
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
N
>
&
x
)
__host__
__device__
constexpr
auto
container_cat
(
const
Container
&
x
)
{
{
return
x
;
return
x
;
}
}
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
34cbbb48
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
#define CK_UNSIGNED_INDEX_TYPE 0
#define CK_UNSIGNED_INDEX_TYPE 0
// multi index
// multi index
#define CK_USE_DYNAMICALLY_INDEXED_MULTI_INDEX
0
#define CK_USE_DYNAMICALLY_INDEXED_MULTI_INDEX
1
// device backend
// device backend
#define CK_DEVICE_BACKEND_AMD 1
#define CK_DEVICE_BACKEND_AMD 1
...
...
composable_kernel/include/utility/functional3.hpp
View file @
34cbbb48
...
@@ -63,7 +63,7 @@ struct ford_impl
...
@@ -63,7 +63,7 @@ struct ford_impl
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
{
{
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
f
,
push_back
(
current_ordered_id
,
i
));
f
,
container_
push_back
(
current_ordered_id
,
i
));
}
}
}
}
};
};
...
@@ -77,7 +77,7 @@ struct ford_impl<Sequence<>, Orders>
...
@@ -77,7 +77,7 @@ struct ford_impl<Sequence<>, Orders>
__host__
__device__
constexpr
void
operator
()(
F
f
,
CurrentOrderedId
current_ordered_id
)
const
__host__
__device__
constexpr
void
operator
()(
F
f
,
CurrentOrderedId
current_ordered_id
)
const
{
{
// retrive unordered Id
// retrive unordered Id
f
(
reorder_
array_
given_old2new
(
current_ordered_id
,
Orders
{}));
f
(
container_
reorder_given_old2new
(
current_ordered_id
,
Orders
{}));
}
}
};
};
...
...
composable_kernel/include/utility/functional4.hpp
View file @
34cbbb48
...
@@ -49,7 +49,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x)
...
@@ -49,7 +49,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x)
// TODO: properly implement unpack that takes any number of containers
// TODO: properly implement unpack that takes any number of containers
template
<
typename
F
,
typename
X
,
typename
Y
>
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
unpack
(
F
&&
f
,
X
&&
x
,
Y
&&
y
)
__host__
__device__
constexpr
auto
unpack
2
(
F
&&
f
,
X
&&
x
,
Y
&&
y
)
{
{
using
X_
=
remove_reference_t
<
X
>
;
using
X_
=
remove_reference_t
<
X
>
;
using
Y_
=
remove_reference_t
<
Y
>
;
using
Y_
=
remove_reference_t
<
Y
>
;
...
...
composable_kernel/include/utility/statically_indexed_array.hpp
View file @
34cbbb48
...
@@ -36,42 +36,5 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
...
@@ -36,42 +36,5 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
return
StaticallyIndexedArray
<
X
,
0
>
();
return
StaticallyIndexedArray
<
X
,
0
>
();
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_exclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
});
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_inclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/tuple.hpp
View file @
34cbbb48
...
@@ -128,12 +128,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -128,12 +128,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__
__device__
constexpr
Tuple
(
Tuple
&&
)
=
default
;
__host__
__device__
constexpr
Tuple
(
Tuple
&&
)
=
default
;
#if 0
template
<
typename
...
Ys
,
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
#else
template
<
typename
...
Ys
>
#endif
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
<
Ys
...
>&
y
)
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
<
Ys
...
>&
y
)
:
base
(
static_cast
<
:
base
(
static_cast
<
const
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
const
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
...
@@ -141,12 +137,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -141,12 +137,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
{
{
}
}
#if 0
template
<
typename
...
Ys
,
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
#else
template
<
typename
...
Ys
>
#endif
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
<
Ys
...
>&&
y
)
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
<
Ys
...
>&&
y
)
:
base
(
static_cast
<
:
base
(
static_cast
<
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
...
@@ -154,7 +146,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -154,7 +146,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
{
{
}
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
{
}
}
...
...
composable_kernel/include/utility/tuple_helper.hpp
View file @
34cbbb48
...
@@ -12,13 +12,6 @@ __host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>)
...
@@ -12,13 +12,6 @@ __host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>)
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{});
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{});
}
}
template
<
typename
...
Tuples
>
__host__
__device__
constexpr
auto
tuple_cat
(
Tuples
&&
...
tuples
)
{
return
unpack
([
&
](
auto
&&
...
xs
)
{
return
make_tuple
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
},
std
::
forward
<
Tuples
>
(
tuples
)...);
}
namespace
detail
{
namespace
detail
{
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
...
...
driver/include/device_dummy_dynamic_transform_v1.hpp
View file @
34cbbb48
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
{
...
@@ -115,10 +115,10 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -115,10 +115,10 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>>
,
const
MultiIndex
<
2
>>
,
dim3
(
GridSize
),
dim3
(
GridSize
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
34cbbb48
...
@@ -96,7 +96,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -96,7 +96,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
{
{
#if
0
#if
1
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_1
<
BlockSize
>
,
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_1
<
BlockSize
>
,
index_t
*
const
,
index_t
*
const
,
float
*
const
,
float
*
const
,
...
...
driver/src/conv_driver.cpp
View file @
34cbbb48
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
#endif
#endif
}
}
#if
0
#if
1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
in_nchw
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx_desc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment