Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0e6bf342
Commit
0e6bf342
authored
Apr 20, 2022
by
rocking
Browse files
Rename elementwise p[ to binary elementwise
parent
5fa209af
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
33 additions
and
30 deletions
+33
-30
example/19_gemm_softmax/gemm_softmax_xdl_fp16.cpp
example/19_gemm_softmax/gemm_softmax_xdl_fp16.cpp
+5
-5
include/ck/tensor_operation/gpu/device/device_binary_elementwise.hpp
...tensor_operation/gpu/device/device_binary_elementwise.hpp
+1
-1
include/ck/tensor_operation/gpu/device/device_binary_elementwise_2d.hpp
...sor_operation/gpu/device/device_binary_elementwise_2d.hpp
+18
-15
include/ck/tensor_operation/gpu/grid/gridwise_binary_elementwise_1d.hpp
...sor_operation/gpu/grid/gridwise_binary_elementwise_1d.hpp
+9
-9
No files found.
example/19_gemm_softmax/gemm_softmax_xdl_fp16.cpp
View file @
0e6bf342
...
...
@@ -23,7 +23,7 @@
#include "device_reduce_blockwise.hpp"
#include "reduction_enums.hpp"
#include "reduction_operator_mapping.hpp"
#include "device_elementwise_2d.hpp"
#include "device_
binary_
elementwise_2d.hpp"
template
<
ck
::
index_t
...
Is
>
using
S
=
ck
::
Sequence
<
Is
...
>
;
...
...
@@ -170,7 +170,7 @@ struct Div
};
using
DeviceElementwiseSubExpInstance
=
ck
::
tensor_operation
::
device
::
DeviceElementwise_2D
<
CDataType
,
ck
::
tensor_operation
::
device
::
Device
Binary
Elementwise_2D
<
CDataType
,
CDataType
,
CDataType
,
EltwiseComputeDataType
,
...
...
@@ -180,7 +180,7 @@ using DeviceElementwiseSubExpInstance =
8
>
;
using
DeviceElementwiseDivInstance
=
ck
::
tensor_operation
::
device
::
DeviceElementwise_2D
<
CDataType
,
CDataType
,
CDataType
,
EltwiseComputeDataType
,
Div
,
256
,
32
,
8
>
;
Device
Binary
Elementwise_2D
<
CDataType
,
CDataType
,
CDataType
,
EltwiseComputeDataType
,
Div
,
256
,
32
,
8
>
;
using
HostGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
CDataType
,
PassThrough
,
PassThrough
,
PassThrough
>
;
...
...
@@ -412,7 +412,7 @@ int main(int argc, char* argv[])
if
(
!
broadcastSubExp
.
IsSupportedArgument
(
broadcastSubExp_argument_ptr
.
get
()))
{
throw
std
::
runtime_error
(
"The runtime parameters seems not supported by the "
"DeviceElementwise_2D instance, exiting!"
);
"Device
Binary
Elementwise_2D instance, exiting!"
);
};
auto
broadcastSubExp_invoker_ptr
=
broadcastSubExp
.
MakeInvokerPointer
();
...
...
@@ -462,7 +462,7 @@ int main(int argc, char* argv[])
if
(
!
broadcastDiv
.
IsSupportedArgument
(
broadcastDiv_argument_ptr
.
get
()))
{
throw
std
::
runtime_error
(
"The runtime parameters seems not supported by the "
"DeviceElementwise_2D instance, exiting!"
);
"Device
Binary
Elementwise_2D instance, exiting!"
);
};
auto
broadcastDiv_invoker_ptr
=
broadcastDiv
.
MakeInvokerPointer
();
...
...
include/ck/tensor_operation/gpu/device/device_elementwise.hpp
→
include/ck/tensor_operation/gpu/device/device_
binary_
elementwise.hpp
View file @
0e6bf342
...
...
@@ -9,7 +9,7 @@ namespace tensor_operation {
namespace
device
{
template
<
typename
ElementwiseFunctor
>
struct
DeviceElementwise
:
public
BaseOperator
struct
Device
Binary
Elementwise
:
public
BaseOperator
{
virtual
std
::
unique_ptr
<
BaseArgument
>
...
...
include/ck/tensor_operation/gpu/device/device_elementwise_2d.hpp
→
include/ck/tensor_operation/gpu/device/device_
binary_
elementwise_2d.hpp
View file @
0e6bf342
...
...
@@ -3,8 +3,8 @@
#include <vector>
#include "device.hpp"
#include "device_elementwise.hpp"
#include "gridwise_elementwise_1d.hpp"
#include "device_
binary_
elementwise.hpp"
#include "gridwise_
binary_
elementwise_1d.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -18,7 +18,7 @@ template <typename ADataType,
index_t
ThreadPerBlock
,
index_t
ThreadTileSize
,
index_t
ScalarPerVector
>
struct
DeviceElementwise_2D
:
public
DeviceElementwise
<
ElementwiseFunctor
>
struct
Device
Binary
Elementwise_2D
:
public
Device
Binary
Elementwise
<
ElementwiseFunctor
>
{
static_assert
(
ThreadTileSize
%
ScalarPerVector
==
0
);
static
constexpr
int
BlockTileSize
=
ThreadPerBlock
*
ThreadTileSize
;
...
...
@@ -51,16 +51,16 @@ struct DeviceElementwise_2D : public DeviceElementwise<ElementwiseFunctor>
return
desc_m0_pad
;
}
using
GridDesc_M0
=
decltype
(
MakeDescriptor_M0
({
1
,
1
},
{
1
,
1
}));
using
GridwiseEltwise
=
GridwiseElementwise_1D
<
ADataType
,
BDataType
,
CDataType
,
ComputeDataType
,
GridDesc_M0
,
ElementwiseFunctor
,
ThreadPerBlock
,
ThreadTileSize
,
ScalarPerVector
>
;
using
GridDesc_M0
=
decltype
(
MakeDescriptor_M0
({
1
,
1
},
{
1
,
1
}));
using
Gridwise
Bin
Eltwise
=
Gridwise
Binary
Elementwise_1D
<
ADataType
,
BDataType
,
CDataType
,
ComputeDataType
,
GridDesc_M0
,
ElementwiseFunctor
,
ThreadPerBlock
,
ThreadTileSize
,
ScalarPerVector
>
;
struct
Argument
:
public
BaseArgument
{
...
...
@@ -101,7 +101,7 @@ struct DeviceElementwise_2D : public DeviceElementwise<ElementwiseFunctor>
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
)
{
const
auto
kernel
=
kernel_elementwise_1d
<
GridwiseEltwise
,
const
auto
kernel
=
kernel_elementwise_1d
<
Gridwise
Bin
Eltwise
,
ADataType
,
BDataType
,
CDataType
,
...
...
@@ -192,8 +192,11 @@ struct DeviceElementwise_2D : public DeviceElementwise<ElementwiseFunctor>
auto
str
=
std
::
stringstream
();
// clang-format off
str
<<
"DeviceElementwise_2D"
str
<<
"Device
Binary
Elementwise_2D"
<<
"<"
<<
"ThreadPerBlock = "
<<
ThreadPerBlock
<<
"ThreadTileSize = "
<<
ThreadTileSize
<<
"ScalarPerVector = "
<<
ScalarPerVector
<<
">"
;
// clang-format on
...
...
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp
→
include/ck/tensor_operation/gpu/grid/gridwise_
binary_
elementwise_1d.hpp
View file @
0e6bf342
...
...
@@ -7,7 +7,7 @@
namespace
ck
{
template
<
typename
GridwiseEltwise
,
template
<
typename
Gridwise
Bin
Eltwise
,
typename
ADataType
,
typename
BDataType
,
typename
CDataType
,
...
...
@@ -21,13 +21,13 @@ __global__ void kernel_elementwise_1d(const ADataType* __restrict__ p_a_global,
const
GridDesc_M0
c_grid_desc_m0
,
const
ElementwiseFunctor
functor
)
{
GridwiseEltwise
::
Run
(
p_a_global
,
p_b_global
,
p_c_global
,
a_grid_desc_m0
,
b_grid_desc_m0
,
c_grid_desc_m0
,
functor
);
Gridwise
Bin
Eltwise
::
Run
(
p_a_global
,
p_b_global
,
p_c_global
,
a_grid_desc_m0
,
b_grid_desc_m0
,
c_grid_desc_m0
,
functor
);
}
template
<
typename
ADataType
,
...
...
@@ -39,7 +39,7 @@ template <typename ADataType,
index_t
ThreadPerBlock
,
index_t
ThreadTileSize
,
index_t
ScalarPerVector
>
struct
GridwiseElementwise_1D
struct
Gridwise
Binary
Elementwise_1D
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
int
BlockTileSize
=
ThreadPerBlock
*
ThreadTileSize
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment