Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
36f6966a
"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "902dcd2cee1df8394506b058d8912e6dcbfdd61f"
Commit
36f6966a
authored
Nov 08, 2023
by
Astha Rai
Browse files
cleaned up comments, rearranged/renamed files
parent
995c6b1c
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
30 additions
and
36 deletions
+30
-36
example/44_elementwise_permute/CMakeLists.txt
example/44_elementwise_permute/CMakeLists.txt
+4
-0
example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp
...4_elementwise_permute/elementwise_permute_4D_fp16_col.cpp
+1
-1
example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp
...4_elementwise_permute/elementwise_permute_4D_fp16_row.cpp
+1
-1
example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp
...4_elementwise_permute/elementwise_permute_4D_fp32_col.cpp
+5
-4
example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp
...4_elementwise_permute/elementwise_permute_4D_fp32_row.cpp
+1
-1
example/65_hip_tensor_permute/CMakeLists.txt
example/65_hip_tensor_permute/CMakeLists.txt
+0
-4
include/ck/tensor_operation/gpu/device/device_elementwise_scale.hpp
.../tensor_operation/gpu/device/device_elementwise_scale.hpp
+4
-4
include/ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp
...eration/gpu/device/impl/device_elementwise_scale_impl.hpp
+14
-14
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp
...nsor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp
+0
-7
No files found.
example/44_elementwise_permute/CMakeLists.txt
View file @
36f6966a
add_example_executable
(
example_elementwise_permute_4D_fp16 elementwise_permute_4D_fp16.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp16_2d elementwise_permute_4D_fp16_2d.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp32_row elementwise_permute_4D_fp32_row.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp16_row elementwise_permute_4D_fp16_row.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp
)
example/
65_hip_tensor
_permute/elementwise_permute_4D_fp16_col.cpp
→
example/
44_elementwise
_permute/elementwise_permute_4D_fp16_col.cpp
View file @
36f6966a
...
...
@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl
_ht
.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_
scale_
impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/
65_hip_tensor
_permute/elementwise_permute_4D_fp16_
ht
.cpp
→
example/
44_elementwise
_permute/elementwise_permute_4D_fp16_
row
.cpp
View file @
36f6966a
...
...
@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl
_ht
.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_
scale_
impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/
65_hip_tensor
_permute/elementwise_permute_4D_fp32_col.cpp
→
example/
44_elementwise
_permute/elementwise_permute_4D_fp32_col.cpp
View file @
36f6966a
...
...
@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl
_ht
.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_
scale_
impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
@@ -27,7 +27,7 @@ using DeviceElementwisePermuteInstance =
UnaryOp
,
// UnaryOp
Scale
,
// Scalar
4
,
// NumDim
8
,
// MPerThread
1
,
// MPerThread
ck
::
Sequence
<
1
>
,
// InScalarPerVectorSeq
ck
::
Sequence
<
1
>>
;
// OutScalarPerVectorSeq
...
...
@@ -60,10 +60,11 @@ int main()
bool
do_verification
=
true
;
bool
time_kernel
=
true
;
std
::
vector
<
std
::
size_t
>
nchw
=
{
4
,
2
,
1
,
8
};
std
::
vector
<
std
::
size_t
>
nhwc
=
{
4
,
1
,
8
,
2
};
std
::
vector
<
std
::
size_t
>
nchw
=
{
5
,
4
,
2
,
3
};
std
::
vector
<
std
::
size_t
>
nhwc
=
{
5
,
2
,
3
,
4
};
Tensor
<
ADataType
>
a
(
nchw
);
Tensor
<
BDataType
>
b
(
nhwc
);
float
scale
=
1.
f
;
auto
i
=
0
;
for
(
std
::
size_t
w
=
0
;
w
<
a
.
mDesc
.
GetLengths
()[
3
];
++
w
)
...
...
example/
65_hip_tensor
_permute/elementwise_permute_4D_fp32_
ht
.cpp
→
example/
44_elementwise
_permute/elementwise_permute_4D_fp32_
row
.cpp
View file @
36f6966a
...
...
@@ -3,7 +3,7 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_impl
_ht
.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_elementwise_
scale_
impl.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
example/65_hip_tensor_permute/CMakeLists.txt
deleted
100644 → 0
View file @
995c6b1c
add_example_executable
(
example_elementwise_permute_4D_fp32_ht elementwise_permute_4D_fp32_ht.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp16_ht elementwise_permute_4D_fp16_ht.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp32_col elementwise_permute_4D_fp32_col.cpp
)
add_example_executable
(
example_elementwise_permute_4D_fp16_col elementwise_permute_4D_fp16_col.cpp
)
include/ck/tensor_operation/gpu/device/device_elementwise_
ht
.hpp
→
include/ck/tensor_operation/gpu/device/device_elementwise_
scale
.hpp
View file @
36f6966a
...
...
@@ -17,7 +17,7 @@ template <typename InDataTypeTuple,
typename
OutDataTypeTuple
,
typename
ElementwiseOperation
,
typename
UnaryOperation
,
typename
Scale
,
typename
Scale
,
index_t
NumDim
>
struct
DeviceElementwise
:
public
BaseOperator
{
...
...
@@ -32,7 +32,7 @@ struct DeviceElementwise : public BaseOperator
const
std
::
array
<
void
*
,
NumOutput
>
out_dev_buffers
,
ElementwiseOperation
elementwise_op
,
UnaryOperation
unary_op
,
Scale
scale_op
)
=
0
;
Scale
scale_op
)
=
0
;
virtual
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
=
0
;
};
// namespace device
...
...
@@ -41,13 +41,13 @@ template <typename InDataTypeTuple,
typename
OutDataTypeTuple
,
typename
ElementwiseOperation
,
typename
UnaryOperation
,
typename
Scale
,
typename
Scale
,
index_t
NumDim
>
using
DeviceElementwisePtr
=
std
::
unique_ptr
<
DeviceElementwise
<
InDataTypeTuple
,
OutDataTypeTuple
,
ElementwiseOperation
,
UnaryOperation
,
Scale
,
Scale
,
NumDim
>>
;
}
// namespace device
...
...
include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl
_ht
.hpp
→
include/ck/tensor_operation/gpu/device/impl/device_elementwise_
scale_
impl.hpp
View file @
36f6966a
...
...
@@ -8,8 +8,8 @@
#include "ck/utility/math.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_
ht
.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_
ht
.hpp"
#include "ck/tensor_operation/gpu/device/device_elementwise_
scale
.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_
scale
.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/host_utility/kernel_launch.hpp"
...
...
@@ -23,7 +23,7 @@ template <typename InDataTypeTuple,
typename
OutDataTypeTuple
,
typename
ElementwiseOperation
,
typename
UnaryOperation
,
typename
Scale
,
typename
Scale
,
index_t
NumDim
,
index_t
MPerThread
,
typename
InScalarPerVectorSeq
,
...
...
@@ -32,7 +32,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypeTuple
,
ElementwiseOperation
,
UnaryOperation
,
Scale
,
Scale
,
NumDim
>
{
static
constexpr
int
NumInput
=
InDataTypeTuple
::
Size
();
...
...
@@ -135,7 +135,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple
,
ElementwiseOperation
,
UnaryOperation
,
Scale
,
Scale
,
MPerThread
,
InScalarPerVectorSeq
,
OutScalarPerVectorSeq
>
;
...
...
@@ -149,14 +149,14 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const
std
::
array
<
void
*
,
NumOutput
>
out_dev_buffers
,
ElementwiseOperation
elementwise_op
,
UnaryOperation
unary_op
,
Scale
scale_op
)
Scale
scale_op
)
:
lengths_
(
lengths
),
inStridesArray_
(
inStridesArray
),
outStridesArray_
(
outStridesArray
),
elementwise_op_
(
elementwise_op
),
unary_op_
(
unary_op
),
scale_op_
(
scale_op
),
scale_op_
(
scale_op
),
blockSize_
(
256
)
{
in_dev_buffers_
=
generate_tuple
(
...
...
@@ -183,7 +183,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
ElementwiseOperation
elementwise_op_
;
UnaryOperation
unary_op_
;
Scale
scale_op_
;
Scale
scale_op_
;
index_t
blockSize_
;
};
...
...
@@ -214,7 +214,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
OutDataTypePointerTuple
,
ElementwiseOperation
,
UnaryOperation
,
Scale
>
;
Scale
>
;
float
elapsed_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
...
...
@@ -227,7 +227,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
arg
.
out_dev_buffers_
,
arg
.
elementwise_op_
,
arg
.
unary_op_
,
arg
.
scale_op_
);
arg
.
scale_op_
);
return
elapsed_time
;
}
...
...
@@ -285,7 +285,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const
std
::
array
<
void
*
,
NumOutput
>
out_dev_buffers
,
ElementwiseOperation
elementwise_op
,
UnaryOperation
unary_op
,
Scale
scale_op
)
Scale
scale_op
)
{
return
Argument
{
lengths
,
inStridesArray
,
...
...
@@ -294,7 +294,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers
,
elementwise_op
,
unary_op
,
scale_op
};
scale_op
};
}
std
::
unique_ptr
<
BaseArgument
>
...
...
@@ -305,7 +305,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
const
std
::
array
<
void
*
,
NumOutput
>
out_dev_buffers
,
ElementwiseOperation
elementwise_op
,
UnaryOperation
unary_op
,
Scale
scale_op
)
override
Scale
scale_op
)
override
{
return
std
::
make_unique
<
Argument
>
(
lengths
,
inStridesArray
,
...
...
@@ -314,7 +314,7 @@ struct DeviceElementwiseImpl : public DeviceElementwise<InDataTypeTuple,
out_dev_buffers
,
elementwise_op
,
unary_op
,
scale_op
);
scale_op
);
}
static
auto
MakeInvoker
()
{
return
Invoker
{};
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_
ht
.hpp
→
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_
scale
.hpp
View file @
36f6966a
...
...
@@ -7,11 +7,6 @@
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#define UNUSED(expr) \
do \
{ \
(void)(expr); \
} while(0)
namespace
ck
{
...
...
@@ -163,7 +158,6 @@ struct GridwiseElementwise_1D
},
Number
<
NumOutput
>
{});
// const auto& scalar = ScalarMult;
index_t
num_iter
=
M
/
(
loop_step
);
do
{
...
...
@@ -211,7 +205,6 @@ struct GridwiseElementwise_1D
Number
<
NumInput
>
{});
unpack2
(
elementwise_op
,
out_data_refs
,
in_data_refs
);
UNUSED
(
scale_op
);
});
static_for
<
0
,
NumOutput
,
1
>
{}([
&
](
auto
I
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment