Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
800f0ab3
Commit
800f0ab3
authored
Feb 10, 2023
by
Chao Liu
Browse files
update hello world example
parent
a0addb61
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
50 additions
and
34 deletions
+50
-34
example/91_tile_program/hello_world.cpp
example/91_tile_program/hello_world.cpp
+15
-4
example/91_tile_program/tile_program.hpp
example/91_tile_program/tile_program.hpp
+35
-30
No files found.
example/91_tile_program/hello_world.cpp
View file @
800f0ab3
...
@@ -14,11 +14,13 @@
...
@@ -14,11 +14,13 @@
// program
// program
struct
HelloWorld
struct
HelloWorld
{
{
__host__
__device__
void
operator
()(
TileProgram
&
tp
,
int
x
,
int
y
)
__host__
__device__
void
operator
()(
TileProgram
&
tp
,
int
x
,
int
y
,
int
*
res
)
{
{
auto
desc
=
tp
.
make_naive_tensor_descriptor_packed
(
ck
::
make_tuple
(
x
));
auto
desc0
=
tp
(
make_naive_tensor_descriptor_packed
(
ck
::
make_tuple
(
x
)));
auto
desc1
=
tp
(
make_naive_tensor_descriptor_packed
(
ck
::
make_tuple
(
y
)));
printf
(
"length %d
\n
"
,
desc
.
GetLength
(
ck
::
Number
<
0
>
{}));
res
[
0
]
=
desc0
.
GetLength
(
ck
::
Number
<
0
>
{});
res
[
1
]
=
desc1
.
GetLength
(
ck
::
Number
<
0
>
{});
}
}
};
};
...
@@ -27,7 +29,16 @@ int main()
...
@@ -27,7 +29,16 @@ int main()
int
x
=
100
;
int
x
=
100
;
int
y
=
101
;
int
y
=
101
;
launch
(
HelloWorld
{},
1
,
1
,
x
,
y
);
DeviceMem
res_dev_buf
(
2
*
sizeof
(
int
));
launch
(
HelloWorld
{},
1
,
1
,
x
,
y
,
static_cast
<
int
*>
(
res_dev_buf
.
GetDeviceBuffer
()));
int
res_host
[
2
];
res_dev_buf
.
FromDevice
(
res_host
);
printf
(
"res_host %d
\n
"
,
res_host
[
0
]);
printf
(
"res_host %d
\n
"
,
res_host
[
1
]);
return
0
;
return
0
;
}
}
example/91_tile_program/tile_program.hpp
View file @
800f0ab3
...
@@ -6,66 +6,71 @@
...
@@ -6,66 +6,71 @@
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
//
namespace tp (for tile programming)
//
hidden intermediate argument
struct
TileProgram
struct
Arg
{
{
// hidden intermediate argument
char
data_
[
128
];
struct
Arg
{
char
data_
[
1024
];
ck
::
index_t
size_
=
0
;
ck
::
index_t
size_
=
0
;
}
;
ck
::
index_t
pos_
=
0
;
// arg on device
__host__
__device__
void
reset
()
Arg
arg_
;
{
ck
::
index_t
arg_pos_
=
0
;
size_
=
0
;
pos_
=
0
;
}
__device__
void
reset_pos
()
{
pos_
=
0
;
}
// push arg on host
// push arg on host
template
<
typename
T
>
template
<
typename
T
>
__host__
auto
push
_arg
(
const
T
&
a
)
__host__
T
push
(
const
T
&
a
)
{
{
*
reinterpret_cast
<
T
*>
(
arg_
.
data_
+
arg_
.
size_
)
=
a
;
*
reinterpret_cast
<
T
*>
(
data_
+
size_
)
=
a
;
arg_
.
size_
+=
sizeof
(
T
);
size_
+=
sizeof
(
T
);
return
a
;
return
a
;
}
}
// pull arg on device
// pull arg on device
template
<
typename
T
>
template
<
typename
T
>
__device__
T
pull
_arg
()
__device__
T
pull
()
{
{
auto
a
=
*
reinterpret_cast
<
T
*>
(
arg_
.
data_
+
arg_
pos_
);
T
a
=
*
reinterpret_cast
<
T
*>
(
data_
+
pos_
);
arg_
pos_
+=
sizeof
(
T
);
pos_
+=
sizeof
(
T
);
return
a
;
return
a
;
}
}
};
// host push
// namespace tp (for tile programming)
template
<
typename
...
Lengths
>
struct
TileProgram
__host__
constexpr
auto
{
make_naive_tensor_descriptor_packed
(
const
ck
::
Tuple
<
Lengths
...
>&
lengths
)
// arg on device
{
Arg
arg_
;
auto
desc
=
ck
::
make_naive_tensor_descriptor_packed
(
lengths
);
return
push_arg
(
desc
);
__device__
void
gpu_init
()
{
arg_
.
reset_pos
();
}
}
// device pull
// push arg on host
template
<
typename
...
Lengths
>
template
<
typename
T
>
__device__
constexpr
auto
__host__
T
operator
()(
const
T
&
a
)
make_naive_tensor_descriptor_packed
(
const
ck
::
Tuple
<
Lengths
...
>&
lengths
)
{
{
using
Desc
=
decltype
(
ck
::
make_naive_tensor_descriptor_packed
(
lengths
));
return
arg_
.
push
(
a
);
}
return
pull_arg
<
Desc
>
();
// push arg on host
template
<
typename
T
>
__device__
T
operator
()(
const
T
&
)
{
return
arg_
.
pull
<
T
>
();
}
}
};
};
template
<
typename
Program
,
typename
...
Xs
>
template
<
typename
Program
,
typename
...
Xs
>
__global__
void
gpu_program_wrapper
(
Program
f
,
TileProgram
tp
,
Xs
...
xs
)
__global__
void
gpu_program_wrapper
(
Program
f
,
TileProgram
tp
,
Xs
...
xs
)
{
{
tp
.
gpu_init
();
f
(
tp
,
xs
...);
f
(
tp
,
xs
...);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment