Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
34cbbb48
"...composable_kernel_rocm.git" did not exist on "01ee5e53fd1a5f62e7494a3ce475110b6e79b367"
Commit
34cbbb48
authored
Sep 30, 2020
by
Chao Liu
Browse files
refactored dynamically indexed array works now
parent
4d70c71b
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
230 additions
and
242 deletions
+230
-242
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
+15
-14
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+32
-10
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+3
-3
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
.../include/tensor_description/dynamic_tensor_coordinate.hpp
+1
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+7
-6
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+1
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+5
-5
composable_kernel/include/tensor_description/multi_index.hpp
composable_kernel/include/tensor_description/multi_index.hpp
+79
-19
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+73
-119
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+1
-1
composable_kernel/include/utility/functional3.hpp
composable_kernel/include/utility/functional3.hpp
+2
-2
composable_kernel/include/utility/functional4.hpp
composable_kernel/include/utility/functional4.hpp
+1
-1
composable_kernel/include/utility/statically_indexed_array.hpp
...sable_kernel/include/utility/statically_indexed_array.hpp
+0
-37
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+3
-9
composable_kernel/include/utility/tuple_helper.hpp
composable_kernel/include/utility/tuple_helper.hpp
+0
-7
driver/include/device_dummy_dynamic_transform_v1.hpp
driver/include/device_dummy_dynamic_transform_v1.hpp
+5
-5
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+1
-1
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
View file @
34cbbb48
...
@@ -97,10 +97,10 @@ struct DummyDynamicTransform_v1
...
@@ -97,10 +97,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
...
@@ -542,10 +542,10 @@ struct DummyDynamicTransform_v1
...
@@ -542,10 +542,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
const
auto
transformed_tensor_descs
=
const
auto
transformed_tensor_descs
=
map_convolution_into_gemm_v1
(
wei_k_c_y_x_global_desc
,
map_convolution_into_gemm_v1
(
wei_k_c_y_x_global_desc
,
...
@@ -564,7 +564,8 @@ struct DummyDynamicTransform_v1
...
@@ -564,7 +564,8 @@ struct DummyDynamicTransform_v1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
idx
);
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
idx
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
@@ -587,7 +588,7 @@ struct DummyDynamicTransform_v1
...
@@ -587,7 +588,7 @@ struct DummyDynamicTransform_v1
1
,
1
,
p_out_global
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
in_gemmk_gemmn_coord
.
GetOffset
(),
#if
0
#if
1
in_gemmk_gemmn_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
in_gemmk_gemmn_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
#else
#else
true
,
true
,
...
@@ -603,10 +604,10 @@ struct DummyDynamicTransform_v1
...
@@ -603,10 +604,10 @@ struct DummyDynamicTransform_v1
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
&
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
&
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
&
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
&
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
Run_2
(
p_wei_global
,
Run_2
(
p_wei_global
,
p_in_global
,
p_in_global
,
...
...
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
34cbbb48
...
@@ -117,19 +117,41 @@ struct DummyDynamicTransform_v2_1
...
@@ -117,19 +117,41 @@ struct DummyDynamicTransform_v2_1
// initialize idx
// initialize idx
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
#pragma unroll 1
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
i
=
0
;
i
<
10
;
++
i
)
{
{
move_dynamic_tensor_coordinate_v2
(
move_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
p_out_global
[
in_gemmk_gemmn_coord
.
GetOffset
()]
=
1
;
// write
float
value
=
1
;
transfer_data
<
float
,
1
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
InMemoryDataOperation
::
Set
,
1
,
1
>
(
&
value
,
0
,
true
,
1
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
#if 1
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
),
#else
true
,
#endif
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
}
}
}
...
@@ -215,13 +237,13 @@ struct DummyDynamicTransform_v2_1
...
@@ -215,13 +237,13 @@ struct DummyDynamicTransform_v2_1
// initialize idx
// initialize idx
static_for
<
0
,
4
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
4
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
#if
0
#if
1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_coord
=
make_dynamic_tensor_coordinate_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
auto
in_coord
=
make_dynamic_tensor_coordinate_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_n_c_hip_wip_global_desc,
M
ulti
I
ndex
<4>{{
1, 0, 0, 0
}}
);
in_n_c_hip_wip_global_desc
,
make_m
ulti
_i
ndex
(
1
,
0
,
0
,
0
)
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
@@ -266,7 +288,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -266,7 +288,7 @@ struct DummyDynamicTransform_v2_1
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_right_pads
)
const
const
MultiIndex
<
2
>
in_right_pads
)
const
{
{
Run_
1
(
p_wei_global
,
Run_
2
(
p_wei_global
,
p_in_global
,
p_in_global
,
p_out_global
,
p_out_global
,
wei_k_c_y_x_global_desc
,
wei_k_c_y_x_global_desc
,
...
@@ -293,7 +315,6 @@ struct DummyDynamicTransform_v2_2
...
@@ -293,7 +315,6 @@ struct DummyDynamicTransform_v2_2
// initialize idx
// initialize idx
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
static_for
<
0
,
2
,
1
>
{}([
&
](
auto
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
});
#if 1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
...
@@ -322,13 +343,14 @@ struct DummyDynamicTransform_v2_2
...
@@ -322,13 +343,14 @@ struct DummyDynamicTransform_v2_2
1
,
1
,
p_out_global
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
in_gemmk_gemmn_coord
.
GetOffset
(),
#if 0
coordinate_has_valid_offset_assuming_visible_index_is_valid(
coordinate_has_valid_offset_assuming_visible_index_is_valid(
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord),
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord),
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
#else
#else
p_out_global
[
in_gemmk_gemmn_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
true
,
#endif
#endif
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
}
}
};
};
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
34cbbb48
...
@@ -371,10 +371,10 @@ struct DynamicMerge
...
@@ -371,10 +371,10 @@ struct DynamicMerge
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
__host__
__device__
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
:
low_lengths_
{
low_lengths
},
:
low_lengths_
{
low_lengths
},
low_lengths_scan_
{
reverse_exclusive_scan
_on_array
<
index_t
,
NDimLow
>
(
low_lengths_scan_
{
container_
reverse_exclusive_scan
<
index_t
,
NDimLow
>
(
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
up_lengths_
{
make_multi_index
(
up_lengths_
{
make_multi_index
(
reduce_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
container_reduce
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
}))}
{
{
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
static_assert
(
LowerIndex
::
Size
()
==
NDimLow
,
"wrong!"
);
}
}
...
@@ -516,7 +516,7 @@ struct DynamicUnMerge
...
@@ -516,7 +516,7 @@ struct DynamicUnMerge
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
__host__
__device__
constexpr
DynamicUnMerge
(
const
UpperIndex
&
up_lengths
)
:
up_lengths_
{
up_lengths
},
:
up_lengths_
{
up_lengths
},
up_lengths_scan_
{
up_lengths_scan_
{
reverse_exclusive_scan
_on_array
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
container_
reverse_exclusive_scan
(
up_lengths
,
math
::
multiplies
<
index_t
>
(),
index_t
{
1
})}
{
{
}
}
...
...
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
View file @
34cbbb48
...
@@ -263,7 +263,7 @@ struct DynamicTensorCoordinate
...
@@ -263,7 +263,7 @@ struct DynamicTensorCoordinate
{
{
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
using
type
=
decltype
(
make_dynamic_tensor_coordinate
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
using
type
=
decltype
(
make_dynamic_tensor_coordinate
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
};
};
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
34cbbb48
...
@@ -45,7 +45,7 @@ struct DynamicNativeTensorDescriptor
...
@@ -45,7 +45,7 @@ struct DynamicNativeTensorDescriptor
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
...
@@ -136,7 +136,7 @@ struct DynamicTransformedTensorDescriptor
...
@@ -136,7 +136,7 @@ struct DynamicTransformedTensorDescriptor
template
<
typename
...
Xs
>
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
{
return
array
_cat
(
xs
...);
return
container
_cat
(
xs
...);
}
}
};
};
...
@@ -211,21 +211,22 @@ struct DynamicTransformedTensorDescriptor
...
@@ -211,21 +211,22 @@ struct DynamicTransformedTensorDescriptor
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
sorted_up_lengths
=
const
auto
sorted_up_lengths
=
reorder_
array_
given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
container_
reorder_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
return
sorted_up_lengths
;
return
sorted_up_lengths
;
}
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
__host__
__device__
constexpr
index_t
GetLength
(
index_t
idim
)
const
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
{
return
GetLengths
()[
idim
];
return
GetLengths
()[
Number
<
IDim
>
{}
];
}
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
View file @
34cbbb48
...
@@ -18,7 +18,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
...
@@ -18,7 +18,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
const
index_t
element_space_size
=
reduce_on_array
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
low_dim_hidden_idss
),
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
View file @
34cbbb48
...
@@ -116,7 +116,7 @@ struct DynamicTensorDescriptor_v2
...
@@ -116,7 +116,7 @@ struct DynamicTensorDescriptor_v2
// maybe this result should be saved as a member variable
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
...
@@ -411,13 +411,13 @@ transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& old_tensor_des
...
@@ -411,13 +411,13 @@ transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& old_tensor_des
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
// put everything together
// put everything together
const
auto
all_transforms
=
tuple
_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
const
auto
all_transforms
=
container
_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_hidden_idss
=
constexpr
auto
all_low_dim_hidden_idss
=
tuple
_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
container
_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
constexpr
auto
all_up_dim_hidden_idss
=
constexpr
auto
all_up_dim_hidden_idss
=
tuple
_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
container
_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_hidden_idss
),
decltype
(
all_low_dim_hidden_idss
),
...
@@ -494,7 +494,7 @@ make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& id
...
@@ -494,7 +494,7 @@ make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& id
// 2) all components of lower index diff will assume to be non-zero and need to be
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
// computed
const
bool
idx_diff_up_has_non_zero
=
const
bool
idx_diff_up_has_non_zero
=
reduce_on_array
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
container_reduce
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
...
...
composable_kernel/include/tensor_description/multi_index.hpp
View file @
34cbbb48
...
@@ -9,30 +9,11 @@ namespace ck {
...
@@ -9,30 +9,11 @@ namespace ck {
template
<
index_t
N
>
template
<
index_t
N
>
using
MultiIndex
=
Array
<
index_t
,
N
>
;
using
MultiIndex
=
Array
<
index_t
,
N
>
;
#if 1 // works
template
<
typename
...
Xs
>
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
{
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
}
#else // doesn't work: don't know how to get the lvalue/rvalue reference correct
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
Xs
&&
...
xs
)
{
return
make_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
#endif
#else
template
<
index_t
N
>
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
return
make_statically_indexed_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
#endif
template
<
index_t
NSize
>
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
__host__
__device__
constexpr
auto
make_zero_multi_index
()
...
@@ -93,5 +74,84 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
...
@@ -93,5 +74,84 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
return
r
;
return
r
;
}
}
#else
template
<
index_t
N
>
using
MultiIndex
=
StaticallyIndexedArray
<
index_t
,
N
>
;
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_multi_index
(
const
Xs
&
...
xs
)
{
return
make_statically_indexed_array
<
const
index_t
>
(
std
::
forward
<
const
Xs
>
(
xs
)...);
}
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
{
return
unpack
([](
auto
...
xs
)
{
return
make_multi_index
(
xs
...);
},
typename
uniform_sequence_gen
<
NSize
,
0
>::
type
{});
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
to_multi_index
(
const
T
&
x
)
{
return
unpack
([](
auto
...
ys
)
{
return
make_multi_index
(
ys
...);
},
x
);
}
// Here should use MultiIndex<NSize>, instead of Tuple<Ys...>, although the former
// is the alias of the latter. This is because compiler cannot infer the NSize if
// using MultiIndex<NSize>
// TODO: how to fix this?
template
<
typename
...
Ys
,
typename
X
>
__host__
__device__
constexpr
auto
operator
+=
(
Tuple
<
Ys
...
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
sizeof
...(
Ys
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Ys
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
+=
x
[
i
];
});
return
y
;
}
template
<
typename
...
Ys
,
typename
X
>
__host__
__device__
constexpr
auto
operator
-=
(
Tuple
<
Ys
...
>&
y
,
const
X
&
x
)
{
static_assert
(
X
::
Size
()
==
sizeof
...(
Ys
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Ys
);
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
-=
x
[
i
];
});
return
y
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
+
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
+
y
[
i
];
});
return
r
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
-
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
-
y
[
i
];
});
return
r
;
}
template
<
typename
...
Xs
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
*
(
const
Tuple
<
Xs
...
>&
x
,
const
Y
&
y
)
{
static_assert
(
Y
::
Size
()
==
sizeof
...(
Xs
),
"wrong! size not the same"
);
constexpr
index_t
NSize
=
sizeof
...(
Xs
);
Tuple
<
Xs
...
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
x
[
i
]
*
y
[
i
];
});
return
r
;
}
#endif
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/array_helper.hpp
View file @
34cbbb48
...
@@ -3,17 +3,17 @@
...
@@ -3,17 +3,17 @@
#include "sequence.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "sequence_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "array.hpp"
#include "array.hpp"
#include "array_helper.hpp"
#include "array_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "statically_indexed_array.hpp"
#include "statically_indexed_array.hpp"
#include "array_element_picker.hpp"
#include "array_element_picker.hpp"
namespace
ck
{
namespace
ck
{
template
<
typename
TData
,
index_t
NSize
>
template
<
typename
TData
,
index_t
NSize
>
__host__
__device__
constexpr
auto
push_back
(
const
Array
<
TData
,
NSize
>&
a
,
const
TData
&
x
)
__host__
__device__
constexpr
auto
container_
push_back
(
const
Array
<
TData
,
NSize
>&
a
,
const
TData
&
x
)
{
{
Array
<
TData
,
NSize
+
1
>
r
;
Array
<
TData
,
NSize
+
1
>
r
;
...
@@ -25,137 +25,64 @@ __host__ __device__ constexpr auto push_back(const Array<TData, NSize>& a, const
...
@@ -25,137 +25,64 @@ __host__ __device__ constexpr auto push_back(const Array<TData, NSize>& a, const
}
}
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
__host__
__device__
constexpr
auto
reorder_array_given_new2old
(
const
Array
<
TData
,
NSize
>&
old_array
,
__host__
__device__
constexpr
auto
Sequence
<
IRs
...
>
/*new2old*/
)
container_reorder_given_new2old
(
const
Array
<
TData
,
NSize
>&
old_array
,
Sequence
<
IRs
...
>
/*new2old*/
)
{
{
static_assert
(
NSize
==
sizeof
...(
IRs
),
"
NS
ize not consistent"
);
static_assert
(
NSize
==
sizeof
...(
IRs
),
"
wrong! s
ize not consistent"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
return
Array
<
TData
,
NSize
>
{
old_array
[
IRs
]...
}
;
return
make_array
(
old_array
[
Number
<
IRs
>
{}
]...
)
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
MapOld2New
>
struct
lambda_reorder_array_given_old2new
{
const
Array
<
TData
,
NSize
>&
old_array
;
Array
<
TData
,
NSize
>&
new_array
;
__host__
__device__
constexpr
lambda_reorder_array_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array_
,
Array
<
TData
,
NSize
>&
new_array_
)
:
old_array
(
old_array_
),
new_array
(
new_array_
)
{
}
template
<
index_t
IOldDim
>
__host__
__device__
constexpr
void
operator
()(
Number
<
IOldDim
>
)
const
{
TData
old_data
=
old_array
[
IOldDim
];
constexpr
index_t
INewDim
=
MapOld2New
::
At
(
Number
<
IOldDim
>
{});
new_array
(
Number
<
INewDim
>
{})
=
old_data
;
}
};
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
,
index_t
...
IRs
>
__host__
__device__
constexpr
auto
reorder_array_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array
,
__host__
__device__
constexpr
auto
Sequence
<
IRs
...
>
/*
old2new
*/
)
container_reorder_given_old2new
(
const
Array
<
TData
,
NSize
>&
old_array
,
Sequence
<
IRs
...
>
old2new
)
{
{
Array
<
TData
,
NSize
>
new_array
;
return
container_reorder_given_new2old
(
old_array
,
typename
sequence_map_inverse
<
decltype
(
old2new
)
>::
type
{});
static_assert
(
NSize
==
sizeof
...(
IRs
),
"NSize not consistent"
);
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>::
value
,
"wrong! invalid reorder map"
);
static_for
<
0
,
NSize
,
1
>
{}(
lambda_reorder_array_given_old2new
<
TData
,
NSize
,
Sequence
<
IRs
...
>>
(
old_array
,
new_array
));
return
new_array
;
}
}
// emulate constepxr lambda for array
template
<
typename
...
Ts
,
index_t
...
IRs
>
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
>
__host__
__device__
constexpr
auto
container_reorder_given_new2old
(
const
Tuple
<
Ts
...
>&
old_tuple
,
struct
lambda_array_math
Sequence
<
IRs
...
>
/*new2old*/
)
{
const
F
&
f
;
const
X
&
x
;
const
Y
&
y
;
Z
&
z
;
__host__
__device__
constexpr
lambda_array_math
(
const
F
&
f_
,
const
X
&
x_
,
const
Y
&
y_
,
Z
&
z_
)
:
f
(
f_
),
x
(
x_
),
y
(
y_
),
z
(
z_
)
{
}
template
<
index_t
IDim_
>
__host__
__device__
constexpr
void
operator
()(
Number
<
IDim_
>
)
const
{
constexpr
auto
IDim
=
Number
<
IDim_
>
{};
z
(
IDim
)
=
f
(
x
[
IDim
],
y
[
IDim
]);
}
};
// Array = Sequence - Array
template
<
typename
TData
,
index_t
NSize
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
operator
-
(
Sequence
<
Is
...
>
a
,
Array
<
TData
,
NSize
>
b
)
{
{
static_assert
(
sizeof
...(
Is
)
==
NSize
,
"wrong! size not the same"
);
static_assert
(
sizeof
...(
Ts
)
==
sizeof
...(
IRs
),
"wrong! size not consistent"
);
Array
<
TData
,
NSize
>
result
;
auto
f
=
math
::
minus
<
index_t
>
{};
static_assert
(
is_valid_sequence_map
<
Sequence
<
IRs
...
>>
{},
"wrong! invalid reorder map"
);
static_for
<
0
,
NSize
,
1
>
{}(
lambda_array_math
<
decltype
(
f
),
decltype
(
a
),
decltype
(
b
),
decltype
(
result
)
>
(
f
,
a
,
b
,
result
));
return
result
;
return
make_tuple
(
old_tuple
[
Number
<
IRs
>
{}]...)
;
}
}
// Array = Array * TData
template
<
typename
...
Ts
,
index_t
...
IRs
>
template
<
typename
TData
,
index_t
NSize
>
__host__
__device__
constexpr
auto
container_reorder_given_old2new
(
const
Tuple
<
Ts
...
>&
old_tuple
,
__host__
__device__
constexpr
auto
operator
*
(
TData
v
,
Array
<
TData
,
NSize
>
a
)
Sequence
<
IRs
...
>
old2new
)
{
{
Array
<
TData
,
NSize
>
result
;
return
container_reorder_given_new2old
(
old_tuple
,
typename
sequence_map_inverse
<
decltype
(
old2new
)
>::
type
{});
for
(
index_t
i
=
0
;
i
<
NSize
;
++
i
)
{
result
(
i
)
=
a
[
i
]
*
v
;
}
return
result
;
}
}
template
<
typename
TData
,
typename
Ar
r
,
typename
Reduce
>
template
<
typename
TData
,
typename
Containe
r
,
typename
Reduce
>
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Ar
r
&
a
,
Reduce
f
,
TData
init
)
__host__
__device__
constexpr
TData
container_reduce
(
const
Containe
r
&
a
,
Reduce
f
,
TData
init
)
{
{
// static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
// static_assert(is_same<typename Arr::data_type, TData>::value, "wrong! different data type");
static_assert
(
Ar
r
::
Size
()
>
0
,
"wrong"
);
static_assert
(
Containe
r
::
Size
()
>
0
,
"wrong"
);
TData
result
=
init
;
TData
result
=
init
;
static_for
<
0
,
Ar
r
::
Size
(),
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
static_for
<
0
,
Containe
r
::
Size
(),
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
return
result
;
return
result
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
reverse_inclusive_scan
_on_array
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
container_
reverse_inclusive_scan
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
{
Array
<
TData
,
NSize
>
y
;
Array
<
TData
,
NSize
>
y
;
TData
r
=
init
;
TData
r
=
init
;
#if 0
#pragma unroll
for(index_t i = NSize - 1; i >= 0; --i)
{
r = f(r, x[i]);
y(i) = r;
}
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
y
(
i
)
=
r
;
...
@@ -163,36 +90,61 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -163,36 +90,61 @@ reverse_inclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
return
y
;
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
reverse_exclusive_scan
_on_array
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
container_
reverse_exclusive_scan
(
const
Array
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
{
Array
<
TData
,
NSize
>
y
;
Array
<
TData
,
NSize
>
y
;
TData
r
=
init
;
TData
r
=
init
;
#if 0
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
#pragma unroll
for(index_t i = NSize - 1; i > 0; --i)
{
y
(
i
)
=
r
;
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
}
});
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
container_reverse_exclusive_scan
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
y(0) = r;
#else
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
});
});
y
(
Number
<
0
>
{})
=
r
;
y
(
Number
<
0
>
{})
=
r
;
#endif
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
container_reverse_inclusive_scan
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
return
y
;
return
y
;
}
}
...
@@ -204,19 +156,21 @@ __host__ __device__ constexpr auto container_cat(const X& x, const Ys&... ys)
...
@@ -204,19 +156,21 @@ __host__ __device__ constexpr auto container_cat(const X& x, const Ys&... ys)
}
}
template
<
typename
T
,
index_t
NX
,
index_t
NY
>
template
<
typename
T
,
index_t
NX
,
index_t
NY
>
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
NX
>&
x
,
const
Array
<
T
,
NY
>&
y
)
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
NX
>&
a
x
,
const
Array
<
T
,
NY
>&
a
y
)
{
{
Array
<
T
,
NX
+
NY
>
z
;
return
unpack2
(
[
&
](
auto
&&
...
zs
)
{
return
make_array
(
std
::
forward
<
decltype
(
zs
)
>
(
zs
)...);
},
ax
,
ay
);
static_for
<
0
,
NX
,
1
>
{}([
&
](
auto
i
)
{
z
(
i
)
=
x
[
i
];
});
}
static_for
<
0
,
NY
,
1
>
{}([
&
](
auto
i
)
{
z
(
i
+
Number
<
NX
>
{})
=
y
[
i
];
});
return
z
;
template
<
typename
...
X
,
typename
...
Y
>
__host__
__device__
constexpr
auto
container_cat
(
const
Tuple
<
X
...
>&
tx
,
const
Tuple
<
Y
...
>&
ty
)
{
return
unpack2
(
[
&
](
auto
&&
...
zs
)
{
return
make_tuple
(
std
::
forward
<
decltype
(
zs
)
>
(
zs
)...);
},
tx
,
ty
);
}
}
template
<
typename
T
,
index_t
N
>
template
<
typename
Container
>
__host__
__device__
constexpr
auto
container_cat
(
const
Array
<
T
,
N
>
&
x
)
__host__
__device__
constexpr
auto
container_cat
(
const
Container
&
x
)
{
{
return
x
;
return
x
;
}
}
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
34cbbb48
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
#define CK_UNSIGNED_INDEX_TYPE 0
#define CK_UNSIGNED_INDEX_TYPE 0
// multi index
// multi index
#define CK_USE_DYNAMICALLY_INDEXED_MULTI_INDEX
0
#define CK_USE_DYNAMICALLY_INDEXED_MULTI_INDEX
1
// device backend
// device backend
#define CK_DEVICE_BACKEND_AMD 1
#define CK_DEVICE_BACKEND_AMD 1
...
...
composable_kernel/include/utility/functional3.hpp
View file @
34cbbb48
...
@@ -63,7 +63,7 @@ struct ford_impl
...
@@ -63,7 +63,7 @@ struct ford_impl
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
for
(
index_t
i
=
0
;
i
<
RemainLengths
::
Front
();
++
i
)
{
{
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
ford_impl
<
decltype
(
RemainLengths
::
PopFront
()),
Orders
>
{}(
f
,
push_back
(
current_ordered_id
,
i
));
f
,
container_
push_back
(
current_ordered_id
,
i
));
}
}
}
}
};
};
...
@@ -77,7 +77,7 @@ struct ford_impl<Sequence<>, Orders>
...
@@ -77,7 +77,7 @@ struct ford_impl<Sequence<>, Orders>
__host__
__device__
constexpr
void
operator
()(
F
f
,
CurrentOrderedId
current_ordered_id
)
const
__host__
__device__
constexpr
void
operator
()(
F
f
,
CurrentOrderedId
current_ordered_id
)
const
{
{
// retrive unordered Id
// retrive unordered Id
f
(
reorder_
array_
given_old2new
(
current_ordered_id
,
Orders
{}));
f
(
container_
reorder_given_old2new
(
current_ordered_id
,
Orders
{}));
}
}
};
};
...
...
composable_kernel/include/utility/functional4.hpp
View file @
34cbbb48
...
@@ -49,7 +49,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x)
...
@@ -49,7 +49,7 @@ __host__ __device__ constexpr auto unpack(F&& f, X&& x)
// TODO: properly implement unpack that takes any number of containers
// TODO: properly implement unpack that takes any number of containers
template
<
typename
F
,
typename
X
,
typename
Y
>
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
unpack
(
F
&&
f
,
X
&&
x
,
Y
&&
y
)
__host__
__device__
constexpr
auto
unpack
2
(
F
&&
f
,
X
&&
x
,
Y
&&
y
)
{
{
using
X_
=
remove_reference_t
<
X
>
;
using
X_
=
remove_reference_t
<
X
>
;
using
Y_
=
remove_reference_t
<
Y
>
;
using
Y_
=
remove_reference_t
<
Y
>
;
...
...
composable_kernel/include/utility/statically_indexed_array.hpp
View file @
34cbbb48
...
@@ -36,42 +36,5 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
...
@@ -36,42 +36,5 @@ __host__ __device__ constexpr auto make_statically_indexed_array()
return
StaticallyIndexedArray
<
X
,
0
>
();
return
StaticallyIndexedArray
<
X
,
0
>
();
}
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_exclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
y
(
i
)
=
r
;
r
=
f
(
r
,
x
[
i
]);
});
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
auto
reverse_inclusive_scan_on_array
(
const
StaticallyIndexedArray
<
TData
,
NSize
>&
x
,
Reduce
f
,
TData
init
)
{
StaticallyIndexedArray
<
TData
,
NSize
>
y
;
TData
r
=
init
;
static_for
<
NSize
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
r
=
f
(
r
,
x
[
i
]);
y
(
i
)
=
r
;
});
r
=
f
(
r
,
x
[
Number
<
0
>
{}]);
y
(
Number
<
0
>
{})
=
r
;
return
y
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/tuple.hpp
View file @
34cbbb48
...
@@ -128,12 +128,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -128,12 +128,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
__host__
__device__
constexpr
Tuple
(
Tuple
&&
)
=
default
;
__host__
__device__
constexpr
Tuple
(
Tuple
&&
)
=
default
;
#if 0
template
<
typename
...
Ys
,
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
#else
template
<
typename
...
Ys
>
#endif
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
<
Ys
...
>&
y
)
__host__
__device__
explicit
constexpr
Tuple
(
const
Tuple
<
Ys
...
>&
y
)
:
base
(
static_cast
<
:
base
(
static_cast
<
const
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
const
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
...
@@ -141,12 +137,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -141,12 +137,8 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
{
{
}
}
#if 0
template
<
typename
...
Ys
,
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
),
bool
>
::
type
=
false
>
#else
template
<
typename
...
Ys
>
#endif
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
<
Ys
...
>&&
y
)
__host__
__device__
explicit
constexpr
Tuple
(
Tuple
<
Ys
...
>&&
y
)
:
base
(
static_cast
<
:
base
(
static_cast
<
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
detail
::
TupleImpl
<
typename
arithmetic_sequence_gen
<
0
,
sizeof
...(
Ys
),
1
>::
type
,
...
@@ -154,7 +146,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -154,7 +146,9 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
{
{
}
}
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
template
<
typename
...
Ys
,
typename
std
::
enable_if
<
sizeof
...(
Ys
)
==
sizeof
...(
Xs
)
&&
sizeof
...(
Ys
)
>
=
1
,
bool
>::
type
=
false
>
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
__host__
__device__
explicit
constexpr
Tuple
(
Ys
&&
...
ys
)
:
base
(
std
::
forward
<
Ys
>
(
ys
)...)
{
{
}
}
...
...
composable_kernel/include/utility/tuple_helper.hpp
View file @
34cbbb48
...
@@ -12,13 +12,6 @@ __host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>)
...
@@ -12,13 +12,6 @@ __host__ __device__ constexpr auto generate_tuple(F&& f, Number<N>)
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{});
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{});
}
}
template
<
typename
...
Tuples
>
__host__
__device__
constexpr
auto
tuple_cat
(
Tuples
&&
...
tuples
)
{
return
unpack
([
&
](
auto
&&
...
xs
)
{
return
make_tuple
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
},
std
::
forward
<
Tuples
>
(
tuples
)...);
}
namespace
detail
{
namespace
detail
{
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
...
...
driver/include/device_dummy_dynamic_transform_v1.hpp
View file @
34cbbb48
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
{
...
@@ -115,10 +115,10 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -115,10 +115,10 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>
,
const
MultiIndex
<
2
>
,
const
Array
<
index_t
,
2
>>
,
const
MultiIndex
<
2
>>
,
dim3
(
GridSize
),
dim3
(
GridSize
),
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
34cbbb48
...
@@ -96,7 +96,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -96,7 +96,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
{
{
#if
0
#if
1
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_1
<
BlockSize
>
,
launch_kernel
(
run_gridwise_operation
<
DummyDynamicTransform_v2_1
<
BlockSize
>
,
index_t
*
const
,
index_t
*
const
,
float
*
const
,
float
*
const
,
...
...
driver/src/conv_driver.cpp
View file @
34cbbb48
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
...
@@ -549,7 +549,7 @@ int main(int argc, char* argv[])
#endif
#endif
}
}
#if
0
#if
1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
in_nchw
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx_desc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment