Commit 494608ce authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed c initial

parent 736a37ba
...@@ -51,8 +51,6 @@ struct make_block_work_sequence<MBlockWork, NBlockWork, NBlock1MBlock0> ...@@ -51,8 +51,6 @@ struct make_block_work_sequence<MBlockWork, NBlockWork, NBlock1MBlock0>
__device__ constexpr auto get() { return Sequence<NBlockWork, MBlockWork>{}; } __device__ constexpr auto get() { return Sequence<NBlockWork, MBlockWork>{}; }
}; };
#define ACCVGPR_ZERO(acc_reg_id) asm volatile("v_accvgpr_write_b32 a[" #acc_reg_id "], 0" : :);
template <index_t GridSize, template <index_t GridSize,
index_t BlockSize, index_t BlockSize,
class ABFloat, class ABFloat,
...@@ -214,11 +212,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2_org ...@@ -214,11 +212,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2_org
constexpr index_t c_thread_size = MPerBlock * NPerBlock / BlockSize; constexpr index_t c_thread_size = MPerBlock * NPerBlock / BlockSize;
auto c_thread_vec = GetRegBuffer<AccFloat, c_thread_size>(); auto c_thread_vec = GetRegBuffer<AccFloat, c_thread_size>();
ACCVGPR_ZERO(0)
ACCVGPR_ZERO(1)
ACCVGPR_ZERO(2)
ACCVGPR_ZERO(3)
// preload data into LDS // preload data into LDS
{ {
a_blockwise_copy.Run(p_a_global, p_a_block); a_blockwise_copy.Run(p_a_global, p_a_block);
...@@ -503,11 +496,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2 ...@@ -503,11 +496,6 @@ struct GridwiseBatchGemmXdlops_gkmkpack_gknkpack_gmn_v2
constexpr index_t c_thread_size = MPerBlock * NPerBlock / BlockSize; constexpr index_t c_thread_size = MPerBlock * NPerBlock / BlockSize;
auto c_thread_vec = GetRegBuffer<AccFloat, c_thread_size>(); auto c_thread_vec = GetRegBuffer<AccFloat, c_thread_size>();
ACCVGPR_ZERO(0)
ACCVGPR_ZERO(1)
ACCVGPR_ZERO(2)
ACCVGPR_ZERO(3)
// preload data into LDS // preload data into LDS
{ {
a_blockwise_copy.Run(p_a_global, p_a_block); a_blockwise_copy.Run(p_a_global, p_a_block);
......
...@@ -37,7 +37,7 @@ union float_vec4_t ...@@ -37,7 +37,7 @@ union float_vec4_t
StaticallyIndexedArray<float2_t, 2> s2; StaticallyIndexedArray<float2_t, 2> s2;
StaticallyIndexedArray<float4_t, 1> s4; StaticallyIndexedArray<float4_t, 1> s4;
float n[4]; float n[4];
__host__ __device__ constexpr float_vec4_t() {} __host__ __device__ constexpr float_vec4_t() { s4(Number<0>{}) = 0; }
template <index_t vs> template <index_t vs>
__host__ __device__ auto& GetVector(Number<vs>); __host__ __device__ auto& GetVector(Number<vs>);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment