Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
92e1588d
Commit
92e1588d
authored
Sep 05, 2021
by
Qianfeng Zhang
Browse files
Add CONSTANT decorator for descriptor read buffer
parent
f0019df3
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
38 additions
and
38 deletions
+38
-38
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_blockwise_reduce_all_dims.cpp
...eneric_reduction_first_call_blockwise_reduce_all_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_blockwise_reduce_partial_dims.cpp
...ic_reduction_first_call_blockwise_reduce_partial_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_multiblock_reduce_all_dims.cpp
...neric_reduction_first_call_multiblock_reduce_all_dims.cpp
+4
-4
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_multiblock_reduce_partial_dims.cpp
...c_reduction_first_call_multiblock_reduce_partial_dims.cpp
+4
-4
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_threadwise_reduce_all_dims.cpp
...neric_reduction_first_call_threadwise_reduce_all_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_threadwise_reduce_partial_dims.cpp
...c_reduction_first_call_threadwise_reduce_partial_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_warpwise_reduce_all_dims.cpp
...generic_reduction_first_call_warpwise_reduce_all_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_warpwise_reduce_partial_dims.cpp
...ric_reduction_first_call_warpwise_reduce_partial_dims.cpp
+3
-3
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_blockwise.cpp
...pper/gridwise_generic_reduction_second_call_blockwise.cpp
+4
-4
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_threadwise.cpp
...per/gridwise_generic_reduction_second_call_threadwise.cpp
+4
-4
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_warpwise.cpp
...apper/gridwise_generic_reduction_second_call_warpwise.cpp
+4
-4
No files found.
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_blockwise_reduce_all_dims.cpp
View file @
92e1588d
...
...
@@ -277,15 +277,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_blockwise_reduce_partial_dims.cpp
View file @
92e1588d
...
...
@@ -278,15 +278,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_multiblock_reduce_all_dims.cpp
View file @
92e1588d
...
...
@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
p_dst_global
;
(
void
)
indices_global
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
void
*
ws_buf1_global
=
static_cast
<
char
*>
(
ws_global
)
+
4096
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
void
*
ws_buf1_global
=
const_cast
<
char
*>
(
static_cast
<
const
char
*>
(
p_src2dDesc
)
+
4096
)
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_multiblock_reduce_partial_dims.cpp
View file @
92e1588d
...
...
@@ -279,16 +279,16 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
p_dst_global
;
(
void
)
indices_global
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
void
*
ws_buf1_global
=
static_cast
<
char
*>
(
ws_global
)
+
4096
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
void
*
ws_buf1_global
=
const_cast
<
char
*>
(
static_cast
<
const
char
*>
(
p_src2dDesc
)
+
4096
)
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_threadwise_reduce_all_dims.cpp
View file @
92e1588d
...
...
@@ -290,15 +290,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_threadwise_reduce_partial_dims.cpp
View file @
92e1588d
...
...
@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_warpwise_reduce_all_dims.cpp
View file @
92e1588d
...
...
@@ -291,15 +291,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_first_call_warpwise_reduce_partial_dims.cpp
View file @
92e1588d
...
...
@@ -292,15 +292,15 @@ extern "C" __global__ void gridwise_generic_reduce_1(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
BlkGroupSize
;
(
void
)
ws_buf2_bytes_offset
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_blockwise.cpp
View file @
92e1588d
...
...
@@ -237,15 +237,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
p_src_global
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
void
*
ws_buf1_global
=
static_cast
<
char
*>
(
ws_global
)
+
4096
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
void
*
ws_buf1_global
=
const_cast
<
char
*>
(
static_cast
<
const
char
*>
(
p_src2dDesc
)
+
4096
)
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_threadwise.cpp
View file @
92e1588d
...
...
@@ -251,15 +251,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
p_src_global
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
void
*
ws_buf1_global
=
static_cast
<
char
*>
(
ws_global
)
+
4096
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
void
*
ws_buf1_global
=
const_cast
<
char
*>
(
static_cast
<
const
char
*>
(
p_src2dDesc
)
+
4096
)
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
composable_kernel/src/kernel_wrapper/gridwise_generic_reduction_second_call_warpwise.cpp
View file @
92e1588d
...
...
@@ -252,15 +252,15 @@ extern "C" __global__ void gridwise_generic_reduce_2(int origReduceLen,
const
void
*
__restrict__
p_src_global
,
float
beta
,
void
*
__restrict__
p_dst_global
,
void
*
__restrict__
ws_global
,
const
void
CONSTANT
*
ws_global
,
long
ws_buf2_bytes_offset
,
void
*
__restrict__
indices_global
)
{
(
void
)
p_src_global
;
const
void
*
p_src2dDesc
=
ws_global
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
har
*>
(
ws_global
)
+
2048
;
void
*
ws_buf1_global
=
static_cast
<
char
*>
(
ws_global
)
+
4096
;
const
void
*
p_src2dDesc
=
cast_pointer_to_generic_address_space
(
ws_global
)
;
const
void
*
p_dst1dDesc
=
static_cast
<
c
onst
char
*>
(
p_src2dDesc
)
+
2048
;
void
*
ws_buf1_global
=
const_cast
<
char
*>
(
static_cast
<
const
char
*>
(
p_src2dDesc
)
+
4096
)
;
const
auto
src2dDesc
=
get_reduction_src2d_descriptor
<
src2d_need_padding
>
(
p_src2dDesc
);
const
auto
dst1dDesc
=
get_reduction_dst1d_descriptor
<
dst1d_need_padding
>
(
p_dst1dDesc
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment