Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4e78d2fc
Commit
4e78d2fc
authored
Sep 11, 2020
by
Chao Liu
Browse files
adding dynamic tensor descriptor
parent
2ffa2708
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
789 additions
and
204 deletions
+789
-204
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
...rnel/include/kernel_algorithm/dummy_dynamic_transform.hpp
+157
-14
composable_kernel/include/kernel_algorithm/dummy_static_transform.hpp
...ernel/include/kernel_algorithm/dummy_static_transform.hpp
+1
-1
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+2
-2
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+44
-36
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+293
-132
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+2
-2
composable_kernel/include/utility/array.hpp
composable_kernel/include/utility/array.hpp
+92
-9
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+8
-6
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+1
-0
composable_kernel/include/utility/functional4.hpp
composable_kernel/include/utility/functional4.hpp
+22
-0
composable_kernel/include/utility/sequence_helper.hpp
composable_kernel/include/utility/sequence_helper.hpp
+15
-0
composable_kernel/include/utility/tuple_helper.hpp
composable_kernel/include/utility/tuple_helper.hpp
+13
-0
driver/include/device_dummy_dynamic_transform.hpp
driver/include/device_dummy_dynamic_transform.hpp
+125
-0
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+14
-2
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
View file @
4e78d2fc
...
...
@@ -3,14 +3,17 @@
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_v2.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "dynamic_tensor_descriptor_helper_v2.hpp"
#include "dynamic_tensor_coordinate.hpp"
namespace
ck
{
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__host__
__device__
constexpr
auto
map_convolution_into_gemm
(
const
DynamicNativeTensorDescriptor
<
4
>
wei_k_c_y_x_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
in_n_c_hi_wi_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
out_n_k_ho_wo_global_desc
,
map_convolution_into_gemm
(
const
WeiDesc
&
wei_k_c_y_x_global_desc
,
const
InDesc
&
in_n_c_hi_wi_global_desc
,
const
OutDesc
&
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
@@ -78,15 +81,88 @@ map_convolution_into_gemm(const DynamicNativeTensorDescriptor<4> wei_k_c_y_x_glo
return
make_tuple
(
in_gemmk_gemmn_global_desc
);
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__host__
__device__
constexpr
auto
map_convolution_into_gemm_v2
(
const
WeiDesc
&
wei_k_c_y_x_global_desc
,
const
InDesc
&
in_n_c_hi_wi_global_desc
,
const
OutDesc
&
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
{
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
1
);
const
index_t
Y
=
wei_k_c_y_x_global_desc
.
GetLength
(
2
);
const
index_t
X
=
wei_k_c_y_x_global_desc
.
GetLength
(
3
);
const
index_t
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
2
);
const
index_t
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
3
);
const
index_t
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
2
);
const
index_t
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
3
);
const
index_t
ConvStrideH
=
conv_strides
[
0
];
const
index_t
ConvStrideW
=
conv_strides
[
1
];
const
index_t
ConvDilationH
=
conv_dilations
[
0
];
const
index_t
ConvDilationW
=
conv_dilations
[
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
0
];
const
index_t
InLeftPadW
=
in_left_pads
[
1
];
const
index_t
InRightPadH
=
in_right_pads
[
0
];
const
index_t
InRightPadW
=
in_right_pads
[
1
];
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor_v2
(
transform_dynamic_tensor_descriptor_v2
(
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicLeftPad
{
Hi
,
InLeftPadH
},
DynamicLeftPad
{
Wi
,
InLeftPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{})),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicRightPad
{
Hi
+
InLeftPadH
,
InRightPadH
},
DynamicRightPad
{
Wi
+
InLeftPadW
,
InRightPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor_v2
(
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicEmbed
<
2
>
{{
Y
,
Ho
},
{
ConvDilationH
,
ConvStrideH
,
0
}},
DynamicEmbed
<
2
>
{{
X
,
Wo
},
{
ConvDilationW
,
ConvStrideW
,
0
}}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor_v2
(
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{{
C
,
Y
,
X
}},
DynamicMerge
<
3
>
{{
N
,
Ho
,
Wo
}}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
return
make_tuple
(
in_gemmk_gemmn_global_desc
);
}
template
<
index_t
BlockSize
>
struct
DummyDynamicTransform
{
__device__
void
Run_v1
(
index_t
*
const
__restrict__
p_wei_global
,
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run_v0
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
DynamicNativeTensorDescriptor
<
4
>
wei_k_c_y_x_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
in_n_c_hi_wi_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
out_n_k_ho_wo_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
@@ -520,12 +596,13 @@ struct DummyDynamicTransform
}
}
__device__
void
Run_v2
(
index_t
*
const
__restrict__
p_wei_global
,
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run_v1
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
DynamicNativeTensorDescriptor
<
4
>
wei_k_c_y_x_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
in_n_c_hi_wi_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
out_n_k_ho_wo_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
@@ -583,12 +660,78 @@ struct DummyDynamicTransform
}
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run_v2
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
{
const
auto
transformed_tensor_descs
=
map_convolution_into_gemm_v2
(
wei_k_c_y_x_global_desc
,
in_n_c_hi_wi_global_desc
,
out_n_k_ho_wo_global_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
const
auto
in_gemmk_gemmn_global_desc
=
transformed_tensor_descs
.
At
(
Number
<
0
>
{});
MultiIndex
<
2
>
idx
;
// initialize idx
for
(
index_t
i
=
0
;
i
<
2
;
++
i
)
{
idx
(
i
)
=
p_wei_global
[
get_thread_local_1d_id
()
+
i
];
}
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
constexpr
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{{
1
,
0
}});
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
// write
float
value
=
1
;
transfer_data
<
float
,
1
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
InMemoryDataOperation
::
Set
,
1
,
1
>
(
&
value
,
0
,
true
,
1
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
),
in_gemmk_gemmn_global_desc
.
GetElementSpaceSize
());
}
}
template
<
typename
WeiDesc
,
typename
InDesc
,
typename
OutDesc
>
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
float
*
const
__restrict__
p_in_global
,
float
*
const
__restrict__
p_out_global
,
const
DynamicNativeTensorDescriptor
<
4
>
wei_k_c_y_x_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
in_n_c_hi_wi_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
out_n_k_ho_wo_global_desc
,
const
WeiDesc
wei_k_c_y_x_global_desc
,
const
InDesc
in_n_c_hi_wi_global_desc
,
const
OutDesc
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
...
...
composable_kernel/include/kernel_algorithm/dummy_static_transform.hpp
View file @
4e78d2fc
...
...
@@ -99,7 +99,7 @@ struct DummyStaticTransform
#pragma unroll 1
for
(
index_t
k
=
0
;
k
<
100
;
++
k
)
{
coord
+=
Array
<
index_t
,
2
>
{
8
,
0
};
coord
+=
Array
<
index_t
,
2
>
{
{
8
,
0
}
}
;
Float
value
=
1
;
transfer_data
<
Float
,
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
4e78d2fc
...
...
@@ -212,9 +212,9 @@ struct DynamicEmbed
{
}
__host__
__device__
static
constexpr
index_t
GetNumOf
Upp
erDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOf
Low
erDimension
()
{
return
1
;
}
__host__
__device__
static
constexpr
index_t
GetNumOf
Low
erDimension
()
{
return
NDimUp
;
}
__host__
__device__
static
constexpr
index_t
GetNumOf
Upp
erDimension
()
{
return
NDimUp
;
}
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
up_lengths_
;
}
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
View file @
4e78d2fc
...
...
@@ -6,44 +6,52 @@
namespace
ck
{
template
<
typename
LowerTensorDescriptor
,
typename
Transforms
,
typename
LowerVisibleDimensionLowerVisibleIdss
,
typename
UpperVisibleDimensionUpperVisibleIdss
>
template
<
index_t
N
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v2
(
const
LowerTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowerVisibleDimensionLowerVisibleIdss
,
UpperVisibleDimensionUpperVisibleIdss
)
make_dynamic_native_tensor_descriptor_packed_v2
(
const
MultiIndex
<
N
>&
lengths
)
{
// convert lower visible dimension idss (tuple of sequences) to hidden dimension idss (tuple of sequences)
constexpr
auto
low_visible_dimension_hidden_idss
=
transform_tuples
(
// convert lower visible dimension ids (a sequence) to hidden dimension ids (a sequence)
[](
auto
low_visible_dim_ids
)
{
return
transform_sequences
(
// convert lower visible dimension id to hidden dimension id
[](
auto
low_visible_dim_id
)
{
return
low_tensor_desc
.
GetVisibleDimensionIds
()[
low_visible_dim_id
];
},
low_visible_dim_ids
);
},
LowerVisibleDimensionLowerVisibleIdss
{});
constexpr
auto
up_visible_dims_
const
auto
all_transforms
=
merge_tuples
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_idss
=
merge_tuples
(
old_tensor_desc
.
GetLowerDimensionIdss
(),
new_low_dim_idss
);
constexpr
auto
all_up_dim_idss
=
merge_tuples
(
old_tensor_desc
.
GetUpperDimensionIdss
(),
new_up_dim_idss
);
constexpr
auto
new_visible_dim_ids
=
new_up_dim_idss
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_idss
),
decltype
(
all_up_dim_idss
),
decltype
(
new_visible_dim_ids
)
>
{
all_transforms
,
old_tensor_desc
.
GetElementSpaceSize
()};
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
>
{
lengths
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{};
const
index_t
element_space_size
=
reduce_on_array
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_v2
(
const
MultiIndex
<
N
>&
lengths
,
const
MultiIndex
<
N
>&
strides
)
{
const
auto
coefficients
=
strides
.
PushBack
(
index_t
{
0
});
const
auto
transforms
=
make_tuple
(
DynamicEmbed
<
N
>
{
lengths
,
coefficients
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{};
index_t
element_space_size
=
1
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
N
;
++
i
)
{
element_space_size
+=
(
lengths
[
i
]
-
1
)
*
strides
[
i
];
}
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
View file @
4e78d2fc
...
...
@@ -12,6 +12,28 @@ struct DynamicTensorCoordinate_v2;
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
;
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
);
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_step_v2
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
);
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
...
...
@@ -22,28 +44,63 @@ template <typename Transforms,
typename
VisibleDimensionIds
>
struct
DynamicTensorDescriptor_v2
{
// private:
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfVisibleDimension
()
{
return
VisibleDimensionIds
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
constexpr
auto
all_low_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionIdss
{});
constexpr
auto
all_up_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
using
unique_sort_all_dim_ids
=
typename
sequence_unique_sort
<
decltype
(
all_dim_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>::
type
;
return
unique_sort_all_dim_ids
::
Size
();
}
constexpr
static
index_t
ntransform_
=
GetNumOfTransform
();
constexpr
static
index_t
ndim_visible_
=
GetNumOfVisibleDimension
();
constexpr
static
index_t
ndim_hidden_
=
GetNumOfHiddenDimension
();
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden_
>
;
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden_
>
;
using
Coordinate
=
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>
;
using
CoordinateStep
=
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor_v2
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
:
transforms_
{
transforms
},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)},
visble_lengths_
{
hidden_lengths_
}
vis
i
ble_lengths_
{
hidden_lengths_
}
{
static_assert
(
Transforms
::
Size
()
==
ntransform
s
_
&&
LowerDimensionIdss
::
Size
()
==
ntransform
s
_
&&
UpperDimensionIdss
::
Size
()
==
ntransform
s
_
,
static_assert
(
Transforms
::
Size
()
==
ntransform_
&&
LowerDimensionIdss
::
Size
()
==
ntransform_
&&
UpperDimensionIdss
::
Size
()
==
ntransform_
,
"wrong! inconsistent # of transformations"
);
// TODO check dependency of dimensions is valid
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
const
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor_v2
()
:
DynamicTensorDescriptor_v2
(
Transforms
{},
index_t
{
0
})
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
GetNumOfVisibleDimension
();
}
...
...
@@ -68,36 +125,10 @@ struct DynamicTensorDescriptor_v2
{
static_assert
(
Idx
::
Size
()
==
GetNumOfDimension
(),
"wrong! inconsistent # of dimension"
);
return
make_tensor_coordinate_v2
(
*
this
,
idx
).
GetOffset
();
}
private:
__host__
__device__
static
constexpr
index_t
GetNumOfVisibleDimension
()
{
return
VisibleDimensionIds
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
constexpr
auto
all_low_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimsionIdss
{});
constexpr
auto
all_up_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimsionIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequenses
(
all_low_dim_ids
,
all_up_dim_ids
);
using
unique_sort_all_dim_ids
=
sequence_unique_sort
<
decltype
(
all_dim_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>::
type
;
return
uniqie_sort_all_dim_ids
::
type
::
Size
();
return
make_dynamic_tensor_coordinate_v2
(
*
this
,
idx
).
GetOffset
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
// private:
__host__
__device__
constexpr
const
auto
&
GetTransforms
()
const
{
return
transforms_
;
}
__host__
__device__
static
constexpr
auto
GetLowerDimensionIdss
()
...
...
@@ -110,7 +141,7 @@ struct DynamicTensorDescriptor_v2
return
UpperDimensionIdss
{};
}
__host__
__device__
static
constexpr
index_t
GetVisibleDimensionIds
()
__host__
__device__
static
constexpr
auto
GetVisibleDimensionIds
()
{
return
VisibleDimensionIds
{};
}
...
...
@@ -118,70 +149,98 @@ struct DynamicTensorDescriptor_v2
__host__
__device__
static
constexpr
auto
InitializeHiddenLengths
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
{
HiddenIndex
lengths_hidden
=
make_zero_multi_index
<
ndim_hidden_
>
();
// zero initialization
HiddenIndex
hidden_lengths
{
0
};
// this is the orignal tensor element space size
lengths
_hidden
(
0
)
=
element_space_size
;
hidden_
lengths
(
0
)
=
element_space_size
;
// lengths for all other hidden dimensions
static_for
<
0
,
ntransform_
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
transforms
.
At
(
itran
);
constexpr
auto
up_dim_ids
=
UpperDimensionIdss
::
At
(
itran
);
constexpr
auto
up_dim_ids
=
UpperDimensionIdss
{}.
At
(
itran
);
const
auto
lengths_up_pick
=
pick_array_element
(
lengths_hidden
,
up_dim_ids
);
// lengths_hidden_pick_up contains a reference to lengths_hidden
auto
hidden_lengths_pick_up
=
pick_array_element
(
hidden_lengths
,
up_dim_ids
);
#pragma unroll
for
(
index_t
i
=
0
;
i
<
lengths_low
.
Size
();
++
i
)
{
lengths_low_pick
(
i
)
=
tran
.
GetUpperLengths
()[
i
];
}
hidden_lengths_pick_up
=
tran
.
GetUpperLengths
();
});
return
lengths
_hidden
;
return
hidden_
lengths
;
}
// private member variables
const
Transforms
transforms_
;
// TODO maybe hidden_lengths_ should use reference_wrapper to save space on stack?
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
const
HiddenIndex
hidden_lengths_
;
// visible_lenths_ contains a reference to hidden_lengths_
const
ArrayElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
// friend functions for making and updating tensor coordinate
__host__
__device__
friend
constexpr
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>
make_tensor_coordinate_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
__host__
__device__
friend
constexpr
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>
make_tensor_coordinate_step_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_diff_visible */
);
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>&
/* coord */
,
const
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>&
/* coord_step */
);
const
ArrayElementPicker
<
const
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
#if 0
// friend class
friend Coordinate;
friend CoordinateStep;
// friend function to transform tensor descriptor
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ friend constexpr auto
transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& /* old_tensor_desc */,
const NewTransforms& /* new_transforms */,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss);
// friend functions for making and moving tensor coordinate
template <typename VisibleIndex>
__host__ __device__ friend constexpr Coordinate
make_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename VisibleIndex>
__host__ __device__ friend constexpr CoordinateStep
make_dynamic_tensor_coordinate_step_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_diff_visible */);
__host__ __device__ friend void
move_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
Coordinate& /* coord */,
const CoordinateStep& /* coord_step */);
// friend functions for valid offset check
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(
const DynamicTensorDescriptor_v2& tensor_desc, const Coordinate& coord);
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset(const DynamicTensorDescriptor_v2& tensor_desc,
const Coordinate& coord);
#endif
};
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate_v2
{
constexpr
index_t
ndim_visible_
=
VisbleDimension
::
Size
();
// private:
static
constexpr
index_t
ndim_visible_
=
VisibleDimensionIds
::
Size
();
using
HiddenIndex
=
MultiIndex
<
NDimHidden
>
;
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinate_v2
(
const
HiddenIndex
&
idx_hidden
)
:
idx_hidden_
{
idx_hidden
},
idx_visible_
{
idx_hidden_
}
{
}
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
GetVisibleIndex
();
}
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
return
GetVisibleIndex
();
}
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
0
];
}
private:
//
private:
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
__host__
__device__
auto
&
GetHiddenIndex
()
{
return
idx_hidden_
;
}
...
...
@@ -195,114 +254,210 @@ struct DynamicTensorCoordinate_v2
// idx_visible_ contains a reference to idx_hidden_
ArrayElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
idx_visible_
;
#if 0
// friend functions for making and updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinate_v2
make_tensor_coordinate_v2
(
const
TensorDesc
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
make_
dynamic_
tensor_coordinate_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template
<
typename
TensorDesc
>
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
template <typename TensorDesc
, typename TensorCoordStep
>
__host__ __device__ friend void move_
dynamic_
tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2& /* coord */,
const
Dynamic
TensorCoord
inateStep_v2
<
TensorDesc
::
GetNumOfTransform
(),
ndim_visible_
>&
/* coord_step */
);
const TensorCoord
Step& /* coord_step */);
#endif
};
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
{
// private:
using
VisibleIndex
=
MultiIndex
<
NDimVisible
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinateStep_v2
(
const
VisibleIndex
&
idx_diff_visible
,
const
Array
<
bool
,
NTransform
>&
do_transforms
)
:
idx_diff_visible_
{
idx_diff_visible
},
do_transforms_
{
do_transforms
}
{
}
private:
// private:
__host__
__device__
constexpr
const
auto
&
GetVisibleIndexDiff
()
const
{
return
idx_diff_visible_
;
}
// private:
const
VisibleIndex
idx_diff_visible_
;
const
Array
<
bool
,
NTransform
>
do_transforms_
;
#if 0
// friend functions for updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinateStep_v2
make_tensor_coordinate_step_v2
(
const
TensorDesc
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
make_
dynamic_
tensor_coordinate_step_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, index_t NDimHidden, typename VisibleDimensionIds>
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
__host__ __device__ friend void move_
dynamic_
tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2<NDimHidden, VisibleDimensionIds>& /* coord */,
const DynamicTensorCoordinateStep_v2& /* coord_step */);
#endif
};
// TODO: Fix this! This is insane, to use an ugly struct instead of lambda because lambda
// doesn't have constructor, and to put it outside the scope where it is used
// (transform_dynamic_tensor_descriptor_v2) because template cannot be defined inside a function
// template
template
<
typename
NewTransforms
>
struct
lambda_get_up_dim_num
{
template
<
typename
I
>
__host__
__device__
constexpr
auto
operator
()(
I
)
const
{
return
Number
<
NewTransforms
{}.
At
(
I
{}).
GetNumOfUpperDimension
()
>
{};
}
};
template
<
typename
OldTensorDescriptor
,
typename
NewTransforms
,
typename
NewLowerDimensionOldVisibleIdss
,
typename
NewUpperDimensionNewVisibleIdss
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v2
(
const
OldTensorDescriptor
&
old_tensor_desc
,
const
NewTransforms
&
new_transforms
,
NewLowerDimensionOldVisibleIdss
,
NewUpperDimensionNewVisibleIdss
)
{
// lower dimension's hidden idss
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
// sequences)
constexpr
auto
low_dim_hidden_idss
=
transform_tuples
(
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
[](
auto
low_dim_visible_ids
)
constexpr
{
return
transform_sequences
(
// convert lower dimension visible id to hidden id
[](
auto
low_dim_visible_id
)
constexpr
{
return
OldTensorDescriptor
::
GetVisibleDimensionIds
()[
low_dim_visible_id
];
},
low_dim_visible_ids
);
},
NewLowerDimensionOldVisibleIdss
{});
constexpr
index_t
num_new_transform
=
NewTransforms
::
Size
();
// upper dimension's hidden idss
constexpr
index_t
old_hidden_dim_number
=
OldTensorDescriptor
::
GetNumOfHiddenDimension
();
constexpr
auto
up_dim_numbers
=
generate_sequence
(
lambda_get_up_dim_num
<
NewTransforms
>
{},
Number
<
num_new_transform
>
{});
constexpr
auto
up_dim_numbers_scan
=
merge_sequences
(
Sequence
<
0
>
{},
inclusive_scan_sequence
(
up_dim_numbers
,
math
::
plus
<
index_t
>
{},
Number
<
0
>
{}));
constexpr
auto
up_dim_hidden_idss
=
generate_tuple
([
old_hidden_dim_number
,
up_dim_numbers_scan
](
auto
i
)
constexpr
{
return
typename
arithmetic_sequence_gen
<
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
],
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
+
1
],
1
>::
type
{};
},
Number
<
num_new_transform
>
{});
// new visible dimension's hidden ids
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
unpack
([](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
constexpr
auto
new_visible_dim_unordered2ordered
=
unpack
(
[](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
constexpr
auto
new_visible_dim_hidden_ids
=
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
// put everything together
const
auto
all_transforms
=
merge_tuples
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_hidden_idss
=
merge_tuples
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
constexpr
auto
all_up_dim_hidden_idss
=
merge_tuples
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_hidden_idss
),
decltype
(
all_up_dim_hidden_idss
),
decltype
(
new_visible_dim_hidden_ids
)
>
{
all_transforms
,
old_tensor_desc
.
GetElementSpaceSize
()};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
)
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
)
{
static_assert
(
t
ensor
_d
esc
.
GetNumOfDimension
()
==
idx_v
isible
.
Size
(),
static_assert
(
T
ensor
D
esc
::
GetNumOfDimension
()
==
V
isible
Index
::
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransformation
();
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
MultiIndex
<
ndim_hidden
>
idx_hidden
;
auto
idx_visible
_pick
=
pick_array_element
(
idx_hidden
,
tensor_desc
.
GetVisibleDimensionIds
()
);
auto
idx_
hidden_pick_
visible
=
pick_array_element
(
idx_hidden
,
visible_dim_ids
);
// initialize visible index
// initialize visible index
#pragma unroll
for
(
index_t
i
<
ndim_visible
;
i
<
ndim_visible
,
++
i
)
for
(
index_t
i
=
0
;
i
<
ndim_visible
;
++
i
)
{
idx_visible
_pick
(
i
)
=
idx_visible
[
i
];
idx_
hidden_pick_
visible
(
i
)
=
idx_visible
[
i
];
}
// calculate hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
transforms
_
.
At
(
itran
);
constexpr
auto
dims_low
=
LowerDimensionIdss
::
At
(
itran
);
constexpr
auto
dims_up
=
UpperDimensionIdss
::
At
(
itran
);
const
auto
&
tran
=
t
ensor_desc
.
GetT
ransforms
()
.
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
Get
LowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
Get
UpperDimensionIdss
().
At
(
itran
);
const
auto
idx_up
=
pick_array_element
(
idx_hidden
_
,
dim_up
);
auto
idx_low
=
pick_array_element
(
idx_hidden
_
,
dim_low
);
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
dim
s
_up
);
auto
idx_low
=
pick_array_element
(
idx_hidden
,
dim
s
_low
);
tran
.
CalculateLowerIndex
(
idx_
up
,
idx_
low
);
tran
.
CalculateLowerIndex
(
idx_
low
,
idx_
up
);
});
// better to use std::move?
return
DynamicTensorCoordinate_v2
{
idx_hidden
};
return
DynamicTensorCoordinate_v2
<
ndim_hidden
,
decltype
(
visible_dim_ids
)
>
{
idx_hidden
};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_tensor_coordinate_step_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_diff_visible
)
make_
dynamic_
tensor_coordinate_step_v2
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
)
{
static_assert
(
t
ensor
_d
esc
.
GetNumOfDimension
()
==
idx_v
isible
.
Size
(),
static_assert
(
T
ensor
D
esc
::
GetNumOfDimension
()
==
V
isible
Index
::
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransformation
();
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
Array
<
bool
,
ntransform
>
do_transforms
=
{
false
};
Array
<
bool
,
ntransform
>
do_transforms
{
false
};
Array
<
bool
,
ndim_hidden
>
non_zero_diff
=
{
false
};
Array
<
bool
,
ndim_hidden
>
non_zero_diff
{
false
};
auto
non_zero_diff_pick_visible
=
pick_array_element
(
non_zero_diff
,
tensor_desc
.
GetVisibleDimensionIds
());
auto
non_zero_diff_pick_visible
=
pick_array_element
(
non_zero_diff
,
visible_dim_ids
);
#pragma unroll
for
(
index_t
i
<
ndim_visible
;
i
<
ndim_visible
,
++
i
)
for
(
index_t
i
=
0
;
i
<
ndim_visible
;
++
i
)
{
non_zero_diff_pick_visible
(
i
)
=
(
idx_diff_visible
[
i
]
!=
0
);
}
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
tensor_desec
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
tensor_desc
.
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
tensor_Desc
.
GetUpperDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
non_zero_diff_pick_up
=
pick_array_element
(
non_zero_diff
,
dims_up
);
auto
non_zero_diff_pick_low
=
pick_array_element
(
non_zero_diff
,
dims_low
);
...
...
@@ -323,28 +478,33 @@ make_tensor_coordinate_step_v2(const TensorDesc& tensor_desc, const VisibleIndex
}
});
return
do_transforms
;
return
DynamicTensorCoordinateStep_v2
<
ntransform
,
ndim_visible
>
{
idx_diff_visible
,
do_transforms
};
}
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
)
__host__
__device__
void
move_
dynamic_
tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
)
{
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
using
HiddenIndex
=
MultiIndex
<
ndim_hidden
>
;
// this is what needs to be calculated
auto
idx_diff_hidden
=
make_zero_multi_index
<
ndim_hidden
>
();
const
auto
idx_diff_visible_pick
=
pick_array_element
(
idx_diff_hidden
,
tensor_desc
.
GetVisibleDimensionIds
());
auto
idx_diff_hidden
=
HiddenIndex
{
0
};
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
auto
idx_diff_hidden_pick_visible
=
pick_array_element
(
idx_diff_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
// initialize visible index diff
// initialize visible index diff
#pragma unroll
for
(
index_t
i
=
0
;
i
<
ndim_visible
_
;
++
i
)
for
(
index_t
i
=
0
;
i
<
ndim_visible
;
++
i
)
{
idx_diff_visible
_pick
(
i
)
=
coord_step
.
GetVisibleIndexDiff
()[
i
];
idx_diff_
hidden_pick_
visible
(
i
)
=
coord_step
.
GetVisibleIndexDiff
()[
i
];
}
// this is what needs to be updated
...
...
@@ -352,16 +512,16 @@ __host__ __device__ void move_tensor_coordinate_v2(const TensorDesc& tensor_desc
// update hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
tensor_desc
.
GetTransform
ation
s
().
At
(
itran
);
constexpr
auto
dims_low
=
t
ensor
_d
esc
.
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
t
ensor
_d
esc
.
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
T
ensor
D
esc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
T
ensor
D
esc
::
GetUpperDimensionIdss
().
At
(
itran
);
// this const is for ArrayElementPicker, Array itself may not be const
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
dim_up
);
const
auto
idx_low
=
pick_array_element
(
idx_hidden
,
dim_low
);
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
dim
s
_up
);
auto
idx_low
=
pick_array_element
(
idx_hidden
,
dim
s
_low
);
const
auto
idx_diff_up
=
pick_array_element
(
idx_diff_hidden
,
dim_up
);
const
auto
idx_diff_low
=
pick_array_element
(
idx_diff_hidden
,
dim_low
);
const
auto
idx_diff_up
=
pick_array_element
(
idx_diff_hidden
,
dim
s
_up
);
auto
idx_diff_low
=
pick_array_element
(
idx_diff_hidden
,
dim
s
_low
);
tran
.
CalculateLowerIndexDiff
(
idx_diff_low
,
idx_diff_up
,
idx_low
,
idx_up
);
...
...
@@ -371,12 +531,13 @@ __host__ __device__ void move_tensor_coordinate_v2(const TensorDesc& tensor_desc
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
bool
constexpr
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
bool
valid
=
true
;
constexpr
index_t
ntransform
=
t
ensor
_d
esc
.
GetNumOfTransform
();
constexpr
index_t
ntransform
=
T
ensor
D
esc
::
GetNumOfTransform
();
const
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
...
...
@@ -387,7 +548,7 @@ __host__ __device__ bool constexpr coordinate_has_valid_offset_assuming_visible_
if
constexpr
(
!
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
())
{
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
t
ensor
_d
esc
.
GetUpperDimensionIdss
().
At
(
itran
));
pick_array_element
(
idx_hidden
,
T
ensor
D
esc
::
GetUpperDimensionIdss
().
At
(
itran
));
valid
=
valid
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up
);
}
...
...
@@ -397,7 +558,7 @@ __host__ __device__ bool constexpr coordinate_has_valid_offset_assuming_visible_
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
bool
constexpr
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
// check visible index
...
...
@@ -406,7 +567,7 @@ __host__ __device__ bool constexpr coordinate_has_valid_offset(const TensorDesc&
bool
is_visible_index_valid
=
true
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
t
ensor
_d
esc
.
GetNumOfDimension
();
++
i
)
for
(
index_t
i
=
0
;
i
<
T
ensor
D
esc
::
GetNumOfDimension
();
++
i
)
{
is_visible_index_valid
=
is_visible_index_valid
&&
(
idx_visible
[
i
]
>=
0
&&
idx_visible
[
i
]
<
tensor_desc
.
GetLength
(
i
));
...
...
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
4e78d2fc
...
...
@@ -17,7 +17,7 @@ __host__ __device__ constexpr auto make_multi_index(Xs... xs)
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
{
make_zero_array
<
index_t
,
NSize
>
();
return
make_zero_array
<
index_t
,
NSize
>
();
}
template
<
index_t
Length
>
...
...
@@ -425,7 +425,7 @@ struct Embed
__host__
__device__
static
constexpr
auto
CalculateLowerIndex
(
const
UpperIndex
&
idx_up
)
{
LowerIndex
idx_low
(
Coefficients
{}[
nDimUp
]
)
;
LowerIndex
idx_low
=
{
Coefficients
{}[
nDimUp
]
}
;
for
(
index_t
i
=
0
;
i
<
nDimUp
;
++
i
)
{
...
...
composable_kernel/include/utility/array.hpp
View file @
4e78d2fc
...
...
@@ -16,14 +16,22 @@ struct Array
// TODO: implement empty Array
TData
mData
[
NSize
+
1
]
=
{
0
};
__host__
__device__
explicit
constexpr
Array
()
{}
template
<
typename
X
,
typename
...
Xs
>
__host__
__device__
constexpr
Array
(
X
x
,
Xs
...
xs
)
:
mData
{
static_cast
<
TData
>
(
x
),
static_cast
<
TData
>
(
xs
)...}
#if 0
__host__ __device__ explicit constexpr Array(TData x)
: mData{x}
{}
__host__ __device__ explicit constexpr Array()
: Array(TData{0})
{}
template <typename... Xs>
__host__ __device__ constexpr Array(Xs... xs)
: mData{static_cast<TData>(xs)...}
{
static_assert
(
sizeof
...(
Xs
)
+
1
==
NSize
,
"wrong! size"
);
static_assert(sizeof...(Xs) == NSize, "wrong! size");
}
#endif
__host__
__device__
static
constexpr
index_t
Size
()
{
return
NSize
;
}
...
...
@@ -63,13 +71,71 @@ struct Array
}
template
<
typename
T
>
__host__
__device__
constexpr
type
&
operator
=
(
const
T
&
x
)
__host__
__device__
constexpr
auto
operator
=
(
const
T
&
a
)
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
+=
(
const
T
&
a
)
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
+=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
-=
(
const
T
&
a
)
{
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
=
x
[
i
];
});
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
-=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
+
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
+
a
[
i
];
});
return
r
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
-
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
-
a
[
i
];
});
return
r
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
*
(
const
T
&
a
)
const
{
static_assert
(
T
::
Size
()
==
Size
(),
"wrong! size not the same"
);
type
r
;
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
operator
[](
i
)
*
a
[
i
];
});
return
r
;
}
struct
lambda_PushBack
// emulate constexpr lambda
{
const
Array
<
TData
,
NSize
>&
old_array
;
...
...
@@ -150,13 +216,30 @@ struct ArrayElementPicker
}
template
<
typename
T
>
__host__
__device__
constexpr
type
&
operator
=
(
const
T
&
a
)
__host__
__device__
constexpr
auto
operator
=
(
const
T
&
a
)
{
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
+=
(
const
T
&
a
)
{
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
+=
a
[
i
];
});
return
*
this
;
}
template
<
typename
T
>
__host__
__device__
constexpr
auto
operator
-=
(
const
T
&
a
)
{
static_for
<
0
,
Size
(),
1
>
{}([
&
](
auto
i
)
{
operator
()(
i
)
-=
a
[
i
];
});
return
*
this
;
}
private:
Arr
&
mArray
;
};
...
...
composable_kernel/include/utility/array_helper.hpp
View file @
4e78d2fc
...
...
@@ -115,6 +115,7 @@ struct lambda_array_math
}
};
#if 0
// Array = Array + Array
template <typename TData, index_t NSize>
__host__ __device__ constexpr auto operator+(Array<TData, NSize> a, Array<TData, NSize> b)
...
...
@@ -210,6 +211,7 @@ __host__ __device__ constexpr auto operator*(Array<TData, NSize> a, Sequence<Is.
return result;
}
#endif
// Array = Sequence - Array
template
<
typename
TData
,
index_t
NSize
,
index_t
...
Is
>
...
...
@@ -242,15 +244,15 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
return
result
;
}
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Array
<
TData
,
NSize
>&
a
,
Reduce
f
,
TData
init
)
template
<
typename
TData
,
typename
Arr
,
typename
Reduce
>
__host__
__device__
constexpr
TData
reduce_on_array
(
const
Arr
&
a
,
Reduce
f
,
TData
init
)
{
TData
result
=
init
;
static_assert
(
is_same
<
typename
Arr
::
data_type
,
TData
>::
value
,
"wrong! different data type"
);
static_assert
(
Arr
::
Size
()
>
0
,
"wrong"
);
static_assert
(
NSize
>
0
,
"wrong"
)
;
TData
result
=
init
;
static_for
<
0
,
N
Size
,
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
static_for
<
0
,
Arr
::
Size
()
,
1
>
{}([
&
](
auto
I
)
{
result
=
f
(
result
,
a
[
I
]);
});
return
result
;
}
...
...
composable_kernel/include/utility/common_header.hpp
View file @
4e78d2fc
...
...
@@ -14,6 +14,7 @@
#include "math.hpp"
#include "number.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "synchronization.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
...
...
composable_kernel/include/utility/functional4.hpp
View file @
4e78d2fc
...
...
@@ -22,6 +22,20 @@ struct unpack_impl<Sequence<Is...>>
}
};
template
<
typename
Seq0
,
typename
Seq1
>
struct
unpack2_impl
;
// TODO: remove this, after properly implementing unpack that takes any number of containers
template
<
index_t
...
Is
,
index_t
...
Js
>
struct
unpack2_impl
<
Sequence
<
Is
...
>
,
Sequence
<
Js
...
>>
{
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
operator
()(
F
f
,
const
X
&
x
,
const
Y
&
y
)
const
{
return
f
(
x
.
At
(
Number
<
Is
>
{})...,
y
.
At
(
Number
<
Js
>
{})...);
}
};
}
// namespace detail
template
<
typename
F
,
typename
X
>
...
...
@@ -30,5 +44,13 @@ __host__ __device__ constexpr auto unpack(F f, const X& x)
return
detail
::
unpack_impl
<
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
>
{}(
f
,
x
);
}
// TODO: properly implement unpack that takes any number of containers
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
unpack
(
F
f
,
const
X
&
x
,
const
Y
&
y
)
{
return
detail
::
unpack2_impl
<
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
,
typename
arithmetic_sequence_gen
<
0
,
Y
::
Size
(),
1
>::
type
>
{}(
f
,
x
,
y
);
}
}
// namespace ck
#endif
composable_kernel/include/utility/sequence_helper.hpp
0 → 100644
View file @
4e78d2fc
#ifndef CK_SEQUENCE_HELPER_HPP
#define CK_SEQUENCE_HELPER_HPP
#include "sequence_helper.hpp"
namespace
ck
{
template
<
typename
F
,
index_t
N
>
__host__
__device__
constexpr
auto
generate_sequence
(
F
,
Number
<
N
>
)
{
return
typename
sequence_gen
<
N
,
F
>::
type
{};
}
}
// namespace ck
#endif
composable_kernel/include/utility/tuple_helper.hpp
View file @
4e78d2fc
...
...
@@ -11,6 +11,19 @@ __host__ __device__ constexpr auto make_tuple(Xs&&... xs)
return
Tuple
<
remove_cv_t
<
remove_reference_t
<
Xs
>>
...
>
(
std
::
forward
<
Xs
>
(
xs
)...);
}
template
<
typename
F
,
index_t
N
>
__host__
__device__
constexpr
auto
generate_tuple
(
F
&&
f
,
Number
<
N
>
)
{
return
unpack
([
&
f
](
auto
&&
...
xs
)
{
return
make_tuple
(
f
(
xs
)...);
},
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{});
}
template
<
typename
...
Tuples
>
__host__
__device__
constexpr
auto
merge_tuples
(
Tuples
...
tuples
)
{
return
unpack
([
&
tuples
...](
auto
...
xs
)
{
return
make_tuple
(
xs
...);
},
tuples
...);
}
namespace
detail
{
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
...
...
driver/include/device_dummy_dynamic_transform.hpp
View file @
4e78d2fc
...
...
@@ -138,3 +138,128 @@ void device_dummy_dynamic_transform(InDesc,
out_nkhw_device_buf
.
FromDevice
(
out_nkhw
.
mData
.
data
());
}
template
<
class
T
,
class
InDesc
,
class
WeiDesc
,
class
OutDesc
,
class
ConvStrides
,
class
ConvDilations
,
class
InLeftPads
,
class
InRightPads
>
void
device_dummy_dynamic_transform_v2
(
InDesc
,
const
Tensor
<
T
>&
in_nchw
,
WeiDesc
,
const
Tensor
<
T
>&
wei_kcyx
,
OutDesc
,
Tensor
<
T
>&
out_nkhw
,
ConvStrides
,
ConvDilations
,
InLeftPads
,
InRightPads
,
ck
::
index_t
nrepeat
)
{
using
namespace
ck
;
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_array
(
InDesc
::
GetLengths
()),
to_array
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_array
(
WeiDesc
::
GetLengths
()),
to_array
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor_v2
(
to_array
(
OutDesc
::
GetLengths
()),
to_array
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_array
(
ConvStrides
{});
const
auto
conv_dilations
=
to_array
(
ConvDilations
{});
const
auto
in_left_pads
=
to_array
(
InLeftPads
{});
const
auto
in_right_pads
=
to_array
(
InRightPads
{});
{
const
auto
tensor_descs
=
map_convolution_into_gemm_v2
(
wei_kcyx_desc
,
in_nchw_desc
,
out_nkhw_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
0
});
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
1
,
0
});
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
{
constexpr
auto
gemmk1_gemmn0
=
MultiIndex
<
2
>
{
1
,
0
};
printf
(
"iter %d
\n
"
,
iter
);
print_array
(
"idx: "
,
in_gemmk_gemmn_coord
.
GetIndex
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_coord
.
GetOffset
());
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
}
}
std
::
size_t
data_sz
=
sizeof
(
T
);
DeviceMem
in_nchw_device_buf
(
data_sz
*
in_nchw
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_kcyx_device_buf
(
data_sz
*
wei_kcyx
.
mDesc
.
GetElementSpace
());
DeviceMem
out_nkhw_device_buf
(
data_sz
*
out_nkhw
.
mDesc
.
GetElementSpace
());
in_nchw_device_buf
.
ToDevice
(
in_nchw
.
mData
.
data
());
wei_kcyx_device_buf
.
ToDevice
(
wei_kcyx
.
mData
.
data
());
out_nkhw_device_buf
.
ToDevice
(
out_nkhw
.
mData
.
data
());
constexpr
index_t
BlockSize
=
256
;
constexpr
index_t
GridSize
=
1
;
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
using
dummy_transform
=
DummyDynamicTransform
<
BlockSize
>
;
for
(
index_t
i
=
0
;
i
<
5
;
++
i
)
{
std
::
cout
<<
"Start running "
<<
nrepeat
<<
" times..."
<<
std
::
endl
;
KernelTimer
timer
;
timer
.
Start
();
for
(
index_t
j
=
0
;
j
<
nrepeat
;
++
j
)
{
launch_kernel
(
run_gridwise_operation
<
dummy_transform
,
index_t
*
const
,
float
*
const
,
float
*
const
,
const
decltype
(
wei_kcyx_desc
),
const
decltype
(
in_nchw_desc
),
const
decltype
(
out_nkhw_desc
),
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>
,
const
Array
<
index_t
,
2
>>
,
dim3
(
GridSize
),
dim3
(
BlockSize
),
0
,
0
,
static_cast
<
index_t
*>
(
wei_kcyx_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
in_nchw_device_buf
.
GetDeviceBuffer
()),
static_cast
<
float
*>
(
out_nkhw_device_buf
.
GetDeviceBuffer
()),
wei_kcyx_desc
,
in_nchw_desc
,
out_nkhw_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
}
}
out_nkhw_device_buf
.
FromDevice
(
out_nkhw
.
mData
.
data
());
}
driver/src/conv_driver.cpp
View file @
4e78d2fc
...
...
@@ -560,7 +560,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif
0
#elif
1
device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw
(
in_nchw_desc
,
in_nchw
,
wei_kcyx_desc
,
...
...
@@ -584,7 +584,7 @@ int main(int argc, char* argv[])
LeftPads
{},
RightPads
{},
nrepeat
);
#elif
1
#elif
0
device_dummy_dynamic_transform
(
in_nchw_desc
,
in_nchw
,
wei_kcyx_desc
,
...
...
@@ -596,6 +596,18 @@ int main(int argc, char* argv[])
LeftPads
{},
RightPads
{},
nrepeat
);
#elif 1
device_dummy_dynamic_transform_v2
(
in_nchw_desc
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx
,
out_nkhw_desc
,
out_nkhw_device
,
ConvStrides
{},
ConvDilations
{},
LeftPads
{},
RightPads
{},
nrepeat
);
#endif
if
(
do_verification
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment