Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
494608ce
Commit
494608ce
authored
Jan 21, 2021
by
Jing Zhang
Browse files
fixed c initial
parent
736a37ba
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
13 deletions
+1
-13
composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_fp16_bfp16.hpp
...lude/tensor_operation/gridwise_gemm_xdlops_fp16_bfp16.hpp
+0
-12
composable_kernel/include/utility/float_type.amd.hpp.in
composable_kernel/include/utility/float_type.amd.hpp.in
+1
-1
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_xdlops_fp16_bfp16.hpp
View file @
494608ce
...
@@ -51,8 +51,6 @@ struct make_block_work_sequence<MBlockWork, NBlockWork, NBlock1MBlock0>
...
@@ -51,8 +51,6 @@ struct make_block_work_sequence<MBlockWork, NBlockWork, NBlock1MBlock0>
__device__
constexpr
auto
get
()
{
return
Sequence
<
NBlockWork
,
MBlockWork
>
{};
}
__device__
constexpr
auto
get
()
{
return
Sequence
<
NBlockWork
,
MBlockWork
>
{};
}
};
};
#define ACCVGPR_ZERO(acc_reg_id) asm volatile("v_accvgpr_write_b32 a[" #acc_reg_id "], 0" : :);
template
<
index_t
GridSize
,
template
<
index_t
GridSize
,
index_t
BlockSize
,
index_t
BlockSize
,
class
ABFloat
,
class
ABFloat
,
...
@@ -214,11 +212,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2_org
...
@@ -214,11 +212,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2_org
constexpr
index_t
c_thread_size
=
MPerBlock
*
NPerBlock
/
BlockSize
;
constexpr
index_t
c_thread_size
=
MPerBlock
*
NPerBlock
/
BlockSize
;
auto
c_thread_vec
=
GetRegBuffer
<
AccFloat
,
c_thread_size
>
();
auto
c_thread_vec
=
GetRegBuffer
<
AccFloat
,
c_thread_size
>
();
ACCVGPR_ZERO
(
0
)
ACCVGPR_ZERO
(
1
)
ACCVGPR_ZERO
(
2
)
ACCVGPR_ZERO
(
3
)
// preload data into LDS
// preload data into LDS
{
{
a_blockwise_copy
.
Run
(
p_a_global
,
p_a_block
);
a_blockwise_copy
.
Run
(
p_a_global
,
p_a_block
);
...
@@ -503,11 +496,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2
...
@@ -503,11 +496,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2
constexpr
index_t
c_thread_size
=
MPerBlock
*
NPerBlock
/
BlockSize
;
constexpr
index_t
c_thread_size
=
MPerBlock
*
NPerBlock
/
BlockSize
;
auto
c_thread_vec
=
GetRegBuffer
<
AccFloat
,
c_thread_size
>
();
auto
c_thread_vec
=
GetRegBuffer
<
AccFloat
,
c_thread_size
>
();
ACCVGPR_ZERO
(
0
)
ACCVGPR_ZERO
(
1
)
ACCVGPR_ZERO
(
2
)
ACCVGPR_ZERO
(
3
)
// preload data into LDS
// preload data into LDS
{
{
a_blockwise_copy
.
Run
(
p_a_global
,
p_a_block
);
a_blockwise_copy
.
Run
(
p_a_global
,
p_a_block
);
...
...
composable_kernel/include/utility/float_type.amd.hpp.in
View file @
494608ce
...
@@ -37,7 +37,7 @@ union float_vec4_t
...
@@ -37,7 +37,7 @@ union float_vec4_t
StaticallyIndexedArray<float2_t, 2> s2;
StaticallyIndexedArray<float2_t, 2> s2;
StaticallyIndexedArray<float4_t, 1> s4;
StaticallyIndexedArray<float4_t, 1> s4;
float n[4];
float n[4];
__host__ __device__ constexpr float_vec4_t() {}
__host__ __device__ constexpr float_vec4_t() {
s4(Number<0>{}) = 0;
}
template <index_t vs>
template <index_t vs>
__host__ __device__ auto& GetVector(Number<vs>);
__host__ __device__ auto& GetVector(Number<vs>);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment