Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0b7fcca6
Commit
0b7fcca6
authored
Aug 18, 2020
by
Chao Liu
Browse files
prototype dynamic tensor descriptor
parent
4388f572
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
732 additions
and
266 deletions
+732
-266
CMakeLists.txt
CMakeLists.txt
+1
-1
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
...rnel/include/kernel_algorithm/dummy_dynamic_transform.hpp
+91
-14
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+204
-146
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+316
-1
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+16
-0
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+0
-42
composable_kernel/include/utility/array.hpp
composable_kernel/include/utility/array.hpp
+8
-7
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+32
-2
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+2
-0
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+0
-51
composable_kernel/include/utility/tuple_helper.hpp
composable_kernel/include/utility/tuple_helper.hpp
+60
-0
script/cmake-rocm3.5.sh
script/cmake-rocm3.5.sh
+2
-2
No files found.
CMakeLists.txt
View file @
0b7fcca6
...
@@ -3,7 +3,7 @@ project(modular_convolution)
...
@@ -3,7 +3,7 @@ project(modular_convolution)
#c++
#c++
enable_language
(
CXX
)
enable_language
(
CXX
)
set
(
CMAKE_CXX_STANDARD 1
4
)
set
(
CMAKE_CXX_STANDARD 1
7
)
set
(
CMAKE_CXX_STANDARD_REQUIRED ON
)
set
(
CMAKE_CXX_STANDARD_REQUIRED ON
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
message
(
"CMAKE_CXX_COMPILER_ID:
${
CMAKE_CXX_COMPILER_ID
}
"
)
...
...
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
View file @
0b7fcca6
...
@@ -414,7 +414,7 @@ struct DummyDynamicTransform
...
@@ -414,7 +414,7 @@ struct DummyDynamicTransform
idx
[
13
]
+=
idx_diff
[
13
];
idx
[
13
]
+=
idx_diff
[
13
];
// padding check
// padding check
bool
is_in_bound
=
true
;
bool
is_in_bound
=
true
;
#else // pad
#else // pad
// offset
// offset
idx
[
0
]
+=
idx_diff
[
0
];
idx
[
0
]
+=
idx_diff
[
0
];
...
@@ -462,25 +462,102 @@ struct DummyDynamicTransform
...
@@ -462,25 +462,102 @@ struct DummyDynamicTransform
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
const
Array
<
index_t
,
2
>
in_right_pads
)
const
{
{
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
1
);
Index
idx_up
;
const
index_t
Y
=
wei_k_c_y_x_global_desc
.
GetLength
(
2
);
const
index_t
X
=
wei_k_c_y_x_global_desc
.
GetLength
(
3
);
idx_up
(
0
)
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
const
index_t
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
2
);
idx_up
(
1
)
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
const
index_t
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
3
);
idx_up
(
2
)
=
in_n_c_hi_wi_global_desc
.
GetLength
(
2
);
idx_up
(
3
)
=
in_n_c_hi_wi_global_desc
.
GetLength
(
3
);
#if 0
const
index_t
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
2
);
const
expr auto trans = GetTransforms(
);
const
index_t
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
3
);
auto idx_low = trans[0]->CalculateLowerIndex(idx_up);
const
index_t
ConvStrideH
=
conv_strides
[
0
];
#elif
1
const
index_t
ConvStrideW
=
conv_strides
[
1
];
constexpr
DynamicCoordinateTransform
*
tran
=
&
embed
;
auto
idx_low
=
tran
->
CalculateLowerIndex
(
idx_up
);
const
index_t
ConvDilationH
=
conv_dilations
[
0
];
#endif
const
index_t
ConvDilationW
=
conv_dilations
[
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
0
];
const
index_t
InLeftPadW
=
in_left_pads
[
1
];
const
index_t
InRightPadH
=
in_right_pads
[
0
];
const
index_t
InRightPadW
=
in_right_pads
[
1
];
p_out_global
[
get_thread_local_1d_id
()]
=
idx_low
[
0
];
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicLeftPad
{
Hi
,
InLeftPadH
},
DynamicLeftPad
{
Wi
,
InLeftPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{})),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicRightPad
{
Hi
+
InLeftPadH
,
InRightPadH
},
DynamicRightPad
{
Wi
+
InLeftPadW
,
InRightPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicEmbed
<
2
>
{{
Y
,
Ho
},
{
ConvDilationH
,
ConvStrideH
,
0
}},
DynamicEmbed
<
2
>
{{
X
,
Wo
},
{
ConvDilationW
,
ConvStrideW
,
0
}}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{{
C
,
Y
,
X
}},
DynamicMerge
<
3
>
{{
N
,
Ho
,
Wo
}}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
#pragma unroll 1
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
{
//
MultiIndex
<
2
>
idx
;
// initialize idx
for
(
index_t
i
=
0
;
i
<
2
;
++
i
)
{
idx
(
i
)
=
p_wei_global
[
10
*
iter
+
get_thread_local_1d_id
()
+
i
];
}
// offset
index_t
offset
=
in_gemmk_gemmn_global_desc
.
CalculateOffset
(
idx
);
// is_in_bound
bool
is_in_bound
=
in_gemmk_gemmn_global_desc
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx
);
// write
float
value
=
1
;
transfer_data
<
float
,
1
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
InMemoryDataOperation
::
Set
,
1
,
1
>
(
&
value
,
0
,
true
,
1
,
p_out_global
,
offset
,
is_in_bound
,
out_n_k_ho_wo_global_desc
.
GetElementSpace
());
}
}
}
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
__device__
void
Run
(
index_t
*
const
__restrict__
p_wei_global
,
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
0b7fcca6
This diff is collapsed.
Click to expand it.
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
0b7fcca6
...
@@ -6,8 +6,323 @@
...
@@ -6,8 +6,323 @@
namespace
ck
{
namespace
ck
{
struct
TensorDescriptor
template
<
index_t
NDim
>
struct
DynamicNativeTensorDescriptor
{
{
using
Index
=
MultiIndex
<
NDim
>
;
const
Index
lengths_
;
const
Index
strides_
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor
(
const
Index
&
lengths
,
const
Index
&
strides
)
:
lengths_
{
lengths
},
strides_
{
strides
}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDim
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
lengths_
;
}
__host__
__device__
constexpr
auto
GetStrides
()
const
{
return
strides_
;
}
__host__
__device__
constexpr
index_t
GetLength
(
index_t
idim
)
const
{
return
lengths_
[
idim
];
}
__host__
__device__
constexpr
index_t
GetStride
(
index_t
idim
)
const
{
return
strides_
[
idim
];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
index_t
space
=
1
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDim
;
++
i
)
{
space
+=
(
GetLength
(
i
)
-
1
)
*
GetStride
(
i
);
}
return
space
;
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
index_t
offset
=
0
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDim
;
++
i
)
{
offset
+=
idx
[
i
]
*
GetStride
(
i
);
}
return
offset
;
}
template
<
typename
UpIdxDiff
,
typename
UpIdx
,
typename
LowIdx
>
__host__
__device__
constexpr
index_t
CalculateOffsetDiff
(
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
const
{
return
CalculateOffset
(
idx_up_diff
);
}
template
<
typename
Idx
>
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
Idx
&
idx
)
const
{
bool
flag
=
true
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDim
;
++
i
)
{
flag
=
flag
&&
idx
[
i
]
>=
0
&&
idx
[
i
]
<
GetLength
(
i
);
}
return
flag
;
}
};
template
<
typename
LowTensorDescriptor
,
// DynamicNativeTensorDescriptor or
// DynamicTransformedTensorDescriptor
typename
Transforms
,
// Tuple<MultIndexTransforms...>
typename
LowDimensionIds
,
// Tuple<Sequence<...>>
typename
UpDimensionIds
>
// Tuple<Sequence<...>>
struct
DynamicTransformedTensorDescriptor
{
const
LowTensorDescriptor
low_tensor_desc_
;
const
Transforms
transforms_
;
static
constexpr
index_t
NTransform
=
Transforms
::
Size
();
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
LowTensorDescriptor
::
GetNumOfDimension
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
index_t
ndim_up
=
0
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
i
)
constexpr
{
constexpr
auto
tmp
=
UpDimensionIds
{}.
At
(
i
);
ndim_up
+=
decltype
(
tmp
)
::
Size
();
});
return
ndim_up
;
}
static
constexpr
index_t
NDimUp
=
GetNumOfUpperDimension
();
static
constexpr
index_t
NDimLow
=
GetNumOfLowerDimension
();
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
struct
lambda_merge_sequences
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
merge_sequences
(
xs
...);
}
};
struct
lambda_merge_arrays
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
merge_arrays
(
xs
...);
}
};
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor
(
const
LowTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
)
:
low_tensor_desc_
{
low_tensor_desc
},
transforms_
{
transforms
}
{
static_assert
(
NTransform
==
Transforms
::
Size
()
&&
NTransform
==
LowDimensionIds
::
Size
()
&&
NTransform
==
UpDimensionIds
::
Size
(),
"wrong! # of transformations not the same"
);
// sanity check:
// LowDimensionIds should include all low-dimensions,
// UpDimensionIds should include all up-dimensions
using
unsorted_up_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{}));
using
sorted_up_dimension_ids
=
typename
sequence_sort
<
unsorted_up_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_up_dimension_ids
::
Size
()
==
NDimUp
&&
is_valid_sequence_map
<
sorted_up_dimension_ids
>
{},
"wrong! UpDimensionIds is not configured correctly"
);
using
unsorted_low_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
LowDimensionIds
{}));
using
sorted_low_dimension_ids
=
typename
sequence_sort
<
unsorted_low_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_low_dimension_ids
::
Size
()
==
NDimLow
&&
is_valid_sequence_map
<
sorted_low_dimension_ids
>
{},
"wrong! LowDimensionIds is not configured correctly"
);
// TODO: sanity check: while a up-dimension could be associated with multille
// transformation, a low-dimension should be associated with only one transformation
// TODO: sanity-check: GetLowerLengths of each transform should be consistent with lengths
// of lower-tensor-descriptor
}
__host__
__device__
static
constexpr
auto
GetNumOfDimension
()
{
return
GetNumOfUpperDimension
();
}
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
// sort upper-dimension-ids
constexpr
auto
unsorted_up_dimension_ids
=
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{});
using
sort_up_dimension_ids
=
sequence_unique_sort
<
decltype
(
unsorted_up_dimension_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>
;
constexpr
auto
sorted2unsorted_map
=
typename
sort_up_dimension_ids
::
sorted2unsorted_map
{};
// sort upper-lengths
const
auto
tuple_of_up_lengths
=
transform_tuples
([](
const
auto
&
tran
)
constexpr
{
return
tran
.
GetUpperLengths
();
},
transforms_
);
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
sorted_up_lengths
=
reorder_array_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
return
sorted_up_lengths
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
__host__
__device__
constexpr
index_t
GetLength
(
index_t
idim
)
const
{
return
GetLengths
()[
idim
];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
return
low_tensor_desc_
.
GetElementSpace
();
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
constexpr
{
auto
tran
=
transforms_
.
At
(
itran
);
auto
idx_up_part
=
pick_array_element
(
idx_up
,
UpDimensionIds
{}.
At
(
itran
));
auto
idx_low_part
=
pick_array_element
(
idx_low
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndex
(
idx_low_part
,
idx_up_part
);
});
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_low_diff
,
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
idx_up_diff_part
=
pick_array_element
(
idx_up_diff
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_up_old_part
=
pick_array_element
(
idx_up_old
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_low_old_part
=
pick_array_element
(
idx_low_old
,
LowDimensionIds
{}.
At
(
itran
));
auto
idx_low_diff_part
=
pick_array_element
(
idx_low_diff
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndexDiff
(
idx_low_diff_part
,
idx_up_diff_part
,
idx_low_old_part
,
idx_up_old_part
);
});
}
template
<
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndex
(
const
UpIdx
&
idx_up
)
const
{
LowerIndex
idx_low
;
CalculateLowerIndex
(
idx_low
,
idx_up
);
return
idx_low
;
}
template
<
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndexDiff
(
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
LowerIndex
idx_low_diff
;
CalculateLowerIndex
(
idx_low_diff
,
idx_up_diff
,
idx_low_old
,
idx_up_old
);
return
idx_low_diff
;
}
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
UpperIndex
&
idx_up
)
const
{
return
low_tensor_desc_
.
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDimUp
;
++
i
)
{
flag
=
flag
&&
idx_up
[
i
]
>=
0
&&
idx_up
[
i
]
<
GetLength
(
i
);
}
return
flag
;
}
__host__
__device__
constexpr
bool
IsValidUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
Transforms
{}.
At
(
itran
);
// check a indtransformation if it does not always has a valid mapping
constexpr
bool
is_valid_up_always_mapped_to_valid_low
=
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
();
if
constexpr
(
!
is_valid_up_always_mapped_to_valid_low
)
{
const
auto
up_dims_part
=
UpDimensionIds
{}.
At
(
itran
);
const
auto
idx_up_part
=
pick_array_element
(
idx_up
,
up_dims_part
);
flag
=
flag
&&
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up_part
);
}
});
return
flag
;
}
};
};
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
0b7fcca6
...
@@ -15,5 +15,21 @@ __host__ __device__ constexpr auto make_dynamic_native_tensor_descriptor(const L
...
@@ -15,5 +15,21 @@ __host__ __device__ constexpr auto make_dynamic_native_tensor_descriptor(const L
return
DynamicNativeTensorDescriptor
<
Lengths
::
GetSize
()
>
(
lengths
,
strides
);
return
DynamicNativeTensorDescriptor
<
Lengths
::
GetSize
()
>
(
lengths
,
strides
);
}
}
template
<
typename
LowTensorDescriptor
,
typename
Transforms
,
typename
LowDimensionIds
,
typename
UpDimensionIds
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor
(
const
LowTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowDimensionIds
,
UpDimensionIds
)
{
return
DynamicTransformedTensorDescriptor
<
LowTensorDescriptor
,
Transforms
,
LowDimensionIds
,
UpDimensionIds
>
{
low_tensor_desc
,
transforms
};
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
0b7fcca6
...
@@ -531,47 +531,5 @@ struct Freeze
...
@@ -531,47 +531,5 @@ struct Freeze
}
}
};
};
template
<
index_t
LowerLength
,
index_t
VectorSize
>
struct
Vectorize
{
using
LowerIndex
=
MultiIndex
<
1
>
;
using
UpperIndex
=
MultiIndex
<
1
>
;
__host__
__device__
constexpr
Vectorize
()
{
static_assert
(
VectorSize
>
0
&&
LowerLength
%
VectorSize
==
0
,
"wrong! cannot evenly divide"
);
}
__host__
__device__
static
constexpr
auto
GetNumOfLowerDimension
()
{
return
Number
<
1
>
{};
}
__host__
__device__
static
constexpr
auto
GetNumOfUpperDimension
()
{
return
Number
<
1
>
{};
}
__host__
__device__
static
constexpr
auto
GetUpperLengths
()
{
return
Sequence
<
LowerLength
/
VectorSize
>
{};
}
__host__
__device__
static
constexpr
auto
CalculateLowerIndex
(
const
UpperIndex
&
idx_up
)
{
return
VectorSize
*
idx_up
;
}
__host__
__device__
static
constexpr
auto
CalculateLowerIndexDiff
(
const
UpperIndex
&
idx_up_diff
,
const
UpperIndex
&
/* idx_up_old */
,
const
LowerIndex
&
/* idx_low_old */
)
{
return
VectorSize
*
idx_up_diff
;
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
__host__
__device__
static
constexpr
bool
IsValidUpperIndexAlwaysMappedToValidLowerIndex
()
{
return
true
;
}
};
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/array.hpp
View file @
0b7fcca6
...
@@ -12,8 +12,9 @@ struct Array
...
@@ -12,8 +12,9 @@ struct Array
using
type
=
Array
<
TData
,
NSize
>
;
using
type
=
Array
<
TData
,
NSize
>
;
using
data_type
=
TData
;
using
data_type
=
TData
;
// hack: add extra element to allow empty array
// TODO: implement empty Array
// TODO: implement empty Array
TData
mData
[
NSize
]
=
{
0
};
TData
mData
[
NSize
+
1
]
=
{
0
};
__host__
__device__
explicit
constexpr
Array
()
{}
__host__
__device__
explicit
constexpr
Array
()
{}
...
@@ -136,16 +137,16 @@ struct ArrayElementPicker
...
@@ -136,16 +137,16 @@ struct ArrayElementPicker
return
mArray
(
IP
);
return
mArray
(
IP
);
}
}
template
<
typename
I
>
__host__
__device__
constexpr
const
data_type
&
operator
[](
index_t
i
)
const
__host__
__device__
constexpr
const
data_type
&
operator
[](
I
i
)
const
{
{
return
At
(
i
);
index_t
ip
=
Picks
{}[
i
];
return
mArray
[
ip
];
}
}
template
<
typename
I
>
__host__
__device__
constexpr
data_type
&
operator
()(
index_t
i
)
__host__
__device__
constexpr
data_type
&
operator
()(
I
i
)
{
{
return
At
(
i
);
index_t
ip
=
Picks
{}[
i
];
return
mArray
(
ip
);
}
}
template
<
typename
T
>
template
<
typename
T
>
...
...
composable_kernel/include/utility/array_helper.hpp
View file @
0b7fcca6
...
@@ -244,7 +244,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
...
@@ -244,7 +244,7 @@ __host__ __device__ constexpr auto operator*(TData v, Array<TData, NSize> a)
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
template
<
typename
TData
,
index_t
NSize
,
typename
Reduce
>
__host__
__device__
constexpr
TData
__host__
__device__
constexpr
TData
accumulat
e_on_array
(
const
Array
<
TData
,
NSize
>&
a
,
Reduce
f
,
TData
init
)
reduc
e_on_array
(
const
Array
<
TData
,
NSize
>&
a
,
Reduce
f
,
TData
init
)
{
{
TData
result
=
init
;
TData
result
=
init
;
...
@@ -288,10 +288,40 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
...
@@ -288,10 +288,40 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
r
=
f
(
r
,
x
[
i
]);
r
=
f
(
r
,
x
[
i
]);
}
}
y
(
i
)
=
r
;
y
(
NSize
-
1
)
=
r
;
return
y
;
return
y
;
}
}
template
<
typename
X
,
typename
...
Ys
>
__host__
__device__
constexpr
auto
merge_arrays
(
const
X
&
x
,
const
Ys
&
...
ys
)
{
return
merge_arrays
(
x
,
merge_arrays
(
ys
...));
}
template
<
typename
T
,
index_t
NX
,
index_t
NY
>
__host__
__device__
constexpr
auto
merge_arrays
(
const
Array
<
T
,
NX
>&
x
,
const
Array
<
T
,
NY
>&
y
)
{
Array
<
T
,
NX
+
NY
>
z
;
for
(
index_t
i
=
0
;
i
<
NX
;
++
i
)
{
z
(
i
)
=
x
[
i
];
}
for
(
index_t
i
=
0
;
i
<
NY
;
++
i
)
{
z
(
i
+
NX
)
=
y
[
i
];
}
return
z
;
}
template
<
typename
X
>
__host__
__device__
constexpr
auto
merge_arrays
(
const
X
&
x
)
{
return
x
;
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/common_header.hpp
View file @
0b7fcca6
...
@@ -8,9 +8,11 @@
...
@@ -8,9 +8,11 @@
#include "float_type.hpp"
#include "float_type.hpp"
#include "type.hpp"
#include "type.hpp"
#include "tuple.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "math.hpp"
#include "math.hpp"
#include "sequence.hpp"
#include "sequence.hpp"
#include "array.hpp"
#include "array.hpp"
#include "array_helper.hpp"
#include "functional.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional3.hpp"
...
...
composable_kernel/include/utility/tuple.hpp
View file @
0b7fcca6
...
@@ -104,56 +104,5 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
...
@@ -104,56 +104,5 @@ struct Tuple : detail::TupleImpl<typename arithmetic_sequence_gen<0, sizeof...(X
}
}
};
};
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_tuple
(
Xs
&&
...
xs
)
{
return
Tuple
<
remove_cv_t
<
remove_reference_t
<
Xs
>>
...
>
(
std
::
forward
<
Xs
>
(
xs
)...);
}
namespace
detail
{
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}))...);
}
template
<
typename
F
,
typename
X
,
typename
Y
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}),
y
.
At
(
Number
<
Is
>
{}))...);
}
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
const
Z
&
z
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}),
y
.
At
(
Number
<
Is
>
{}),
z
.
At
(
Number
<
Is
>
{}))...);
}
}
// namespace detail
template
<
typename
F
,
typename
X
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
,
const
Y
&
y
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
y
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
const
Z
&
z
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
y
,
z
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/tuple_helper.hpp
0 → 100644
View file @
0b7fcca6
#ifndef CK_TUPLE_HELPER_HPP
#define CK_TUPLE_HELPER_HPP
#include "tuple_helper.hpp"
namespace
ck
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_tuple
(
Xs
&&
...
xs
)
{
return
Tuple
<
remove_cv_t
<
remove_reference_t
<
Xs
>>
...
>
(
std
::
forward
<
Xs
>
(
xs
)...);
}
namespace
detail
{
template
<
typename
F
,
typename
X
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}))...);
}
template
<
typename
F
,
typename
X
,
typename
Y
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}),
y
.
At
(
Number
<
Is
>
{}))...);
}
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
,
index_t
...
Is
>
__host__
__device__
constexpr
auto
transform_tuples_impl
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
const
Z
&
z
,
Sequence
<
Is
...
>
)
{
return
make_tuple
(
f
(
x
.
At
(
Number
<
Is
>
{}),
y
.
At
(
Number
<
Is
>
{}),
z
.
At
(
Number
<
Is
>
{}))...);
}
}
// namespace detail
template
<
typename
F
,
typename
X
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
template
<
typename
F
,
typename
X
,
typename
Y
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
,
const
Y
&
y
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
y
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
template
<
typename
F
,
typename
X
,
typename
Y
,
typename
Z
>
__host__
__device__
constexpr
auto
transform_tuples
(
F
f
,
const
X
&
x
,
const
Y
&
y
,
const
Z
&
z
)
{
return
detail
::
transform_tuples_impl
(
f
,
x
,
y
,
z
,
typename
arithmetic_sequence_gen
<
0
,
X
::
Size
(),
1
>::
type
{});
}
}
// namespace ck
#endif
script/cmake-rocm3.5.sh
View file @
0b7fcca6
...
@@ -17,5 +17,5 @@ cmake
...
@@ -17,5 +17,5 @@ cmake
${
MY_PROJECT_SOURCE
}
${
MY_PROJECT_SOURCE
}
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -save-temps
=$CWD
" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps" \
#-D CMAKE_CXX_FLAGS="-O3 --amdgpu-target=gfx906 -mllvm --amdgpu-enable-global-sgpr-addr -mllvm --amdgpu-spill-vgpr-to-agpr=0 -v -gline-tables-only -save-temps
=$CWD
" \
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment