Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
44a12304
Commit
44a12304
authored
Sep 07, 2022
by
turneram
Browse files
Rough draft working
parent
cfbe4da6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
69 deletions
+43
-69
src/targets/gpu/kernels/include/migraphx/kernels/ck_elementwise.hpp
...s/gpu/kernels/include/migraphx/kernels/ck_elementwise.hpp
+43
-69
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/ck_elementwise.hpp
View file @
44a12304
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/device_utility/kernel_launch.hpp"
...
@@ -42,20 +43,32 @@ using CDataType = float;
...
@@ -42,20 +43,32 @@ using CDataType = float;
using
ElementwiseFunctor
=
float
;
using
ElementwiseFunctor
=
float
;
static
constexpr
auto
I0
=
ck
::
Number
<
0
>
{};
static
constexpr
auto
I0
=
ck
::
Number
<
0
>
{};
using
index_t
=
index_int
;
template
<
class
L
,
class
S
>
template
<
class
L
,
class
S
,
class
N
>
__host__
__device__
constexpr
auto
MakeDescriptor_M
(
const
L
&
lengths
,
const
S
&
strides
)
constexpr
auto
MakeDescriptor_M
(
const
L
&
lengths
,
const
S
&
strides
,
const
N
&
/* ndim */
)
{
{
auto
idx
=
make_index
();
auto
gridSize
=
72
;
auto
blockSize
=
1024
;
constexpr
auto
ndim
=
1
;
//auto idx = make_index();
auto
tupleOfShape
=
generate_tuple
([
&
](
auto
I
)
{
return
static_cast
<
ck
::
index_t
>
(
lengths
[
I
]);
},
auto
tupleOfShape
=
generate_tuple
([
&
](
auto
I
)
{
return
static_cast
<
ck
::
index_t
>
(
lengths
[
I
]);
},
ck
::
Number
<
1
>
{});
ck
::
Number
<
ndim
>
{});
auto
tupleOfStride
=
generate_tuple
(
auto
tupleOfStride
=
generate_tuple
(
[
&
](
auto
I
)
{
return
static_cast
<
ck
::
index_t
>
(
strides
[
I
]);
},
ck
::
Number
<
1
>
{});
[
&
](
auto
I
)
{
return
static_cast
<
ck
::
index_t
>
(
strides
[
I
]);
},
ck
::
Number
<
1
>
{});
const
auto
desc_m
=
make_naive_tensor_descriptor
(
tupleOfShape
,
tupleOfStride
);
const
auto
desc
=
make_naive_tensor_descriptor
(
tupleOfShape
,
tupleOfStride
);
auto
desc_m
=
desc
;
// merge nd to 1d desc - [s0 * s1 * ...]
if
constexpr
(
ndim
>
1
)
{
desc_m
=
transform_tensor_descriptor
(
desc
,
make_tuple
(
make_merge_transform
(
tupleOfShape
)),
make_tuple
(
generate_sequence_v2
([
&
](
auto
I
)
{
return
I
;
},
ck
::
Number
<
ndim
>
{})),
make_tuple
(
ck
::
Sequence
<
0
>
{}));
}
const
auto
M
=
desc_m
.
GetLength
(
I0
);
const
auto
M
=
desc_m
.
GetLength
(
I0
);
const
index_t
loop_step
=
idx
.
nglobal
();
// gridSize * blockSize * MPerThread;
const
ck
::
index_t
loop_step
=
/*
idx.nglobal(); //
*/
gridSize
*
blockSize
/*
* MPerThread
*/
;
const
auto
pad
=
ck
::
math
::
integer_least_multiple
(
M
,
loop_step
)
-
M
;
const
auto
pad
=
ck
::
math
::
integer_least_multiple
(
M
,
loop_step
)
-
M
;
const
auto
desc_m_pad
=
const
auto
desc_m_pad
=
transform_tensor_descriptor
(
desc_m
,
transform_tensor_descriptor
(
desc_m
,
...
@@ -68,18 +81,7 @@ __host__ __device__ constexpr auto MakeDescriptor_M(const L& lengths, const S& s
...
@@ -68,18 +81,7 @@ __host__ __device__ constexpr auto MakeDescriptor_M(const L& lengths, const S& s
struct
Add
struct
Add
{
{
template
<
typename
Y
,
typename
X0
,
typename
X1
>
template
<
typename
Y
,
typename
X0
,
typename
X1
>
__host__
__device__
constexpr
void
operator
()(
Y
&
y
,
const
X0
&
x0
,
const
X1
&
x1
)
const
;
__device__
constexpr
void
operator
()(
Y
&
y
,
const
X0
&
x0
,
const
X1
&
x1
)
const
template
<
>
__host__
__device__
constexpr
void
operator
()
<
float
>
(
float
&
y
,
const
float
&
x0
,
const
float
&
x1
)
const
{
y
=
x0
+
x1
;
};
template
<
>
__host__
__device__
constexpr
void
operator
()
<
double
>
(
double
&
y
,
const
double
&
x0
,
const
double
&
x1
)
const
{
{
y
=
x0
+
x1
;
y
=
x0
+
x1
;
};
};
...
@@ -88,57 +90,29 @@ struct Add
...
@@ -88,57 +90,29 @@ struct Add
template
<
class
T
,
class
U
,
class
V
>
template
<
class
T
,
class
U
,
class
V
>
__device__
void
ck_elementwise
(
const
T
&
a_t
,
const
U
&
b_t
,
const
V
&
c_t
)
__device__
void
ck_elementwise
(
const
T
&
a_t
,
const
U
&
b_t
,
const
V
&
c_t
)
{
{
// auto add = [](auto a, auto b) { return a + b; };
auto
idx
=
make_index
();
auto
lengths
=
a_t
.
get_shape
().
lens
;
if
(
idx
.
global
==
0
)
auto
strides
=
a_t
.
get_shape
().
strides
;
{
auto
a_desc
=
MakeDescriptor_M
(
lengths
,
strides
);
constexpr
auto
lengths
=
get_shape_c
<
T
>
{}.
lens
;
constexpr
auto
strides
=
get_shape_c
<
T
>
{}.
strides
;
using
AGridDesc_M
=
decltype
(
a_desc
);
constexpr
auto
a_desc
=
MakeDescriptor_M
(
lengths
,
strides
,
1
);
// using Add = ck::tensor_operation::element_wise::Add;
using
GridwiseBinEltwise
=
ck
::
GridwiseBinaryElementwise_1D
<
ADataType
,
BDataType
,
CDataType
,
CDataType
,
AGridDesc_M
,
AGridDesc_M
,
AGridDesc_M
,
Add
,
8
,
8
,
8
,
8
>
;
auto
op
=
Add
{};
GridwiseBinEltwise
::
Run
(
a_t
.
data
(),
b_t
.
data
(),
c_t
.
data
(),
a_desc
,
a_desc
,
a_desc
,
op
);
// auto kernel = ck::kernel_binary_elementwise_1d<GridwiseBinEltwise,
// ADataType,
// BDataType,
// CDataType,
// AGridDesc_M,
// AGridDesc_M,
// AGridDesc_M,
// Add>;
// kernel(a_t.data(), b_t.data(), c_t.data(), a_desc, a_desc, a_desc, Add);
// Argument arg{a_t.data(), b_t.data(), c_t.data(), c_t.get_shape().lens,
// a_t.get_shape().strides, b_t.get_shape().strides, c_t.get_shape().strides,
// add};
// auto lengths = a_t.get_shape().lens;
// auto strides = a_t.get_shape().strides;
// auto idx = make_index();
// b_t.get_shape();
// c_t.get_shape();
// auto tupleOfShape = generate_tuple([&](auto I) { return lengths[I]; }, ck::Number<1>{});
// auto tupleOfStride = generate_tuple([&](auto I) { return strides[I]; }, ck::Number<1>{});
// const auto desc_m = make_naive_tensor_descriptor(tupleOfShape, tupleOfStride);
// const auto M = desc_m.GetLength(I0);
using
AGridDesc_M
=
decltype
(
a_desc
);
// const ck::index_t loop_step = idx.nglobal();//gridSize * blockSize * MPerThread;
using
GridwiseBinEltwise
=
ck
::
GridwiseBinaryElementwise_1D
<
ADataType
,
// const auto pad = ck::math::integer_least_multiple(M, loop_step) - M;
BDataType
,
// const auto desc_m_pad =
CDataType
,
// transform_tensor_descriptor(desc_m,
CDataType
,
// make_tuple(ck::make_right_pad_transform(M, pad)),
AGridDesc_M
,
// make_tuple(ck::Sequence<0>{}),
AGridDesc_M
,
// make_tuple(ck::Sequence<0>{}));
AGridDesc_M
,
Add
,
1
,
1
,
1
,
1
>
;
auto
op
=
Add
{};
GridwiseBinEltwise
::
Run
(
a_t
.
data
(),
b_t
.
data
(),
c_t
.
data
(),
a_desc
,
a_desc
,
a_desc
,
op
);
}
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment