"app/vscode:/vscode.git/clone" did not exist on "231cc878cba3f5d080bc96faf8ecbded31b0b4e2"
Commit 313f3c07 authored by Jing Zhang's avatar Jing Zhang
Browse files

unroll k

parent 0f620a90
......@@ -385,9 +385,9 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
void* a_loc = (void*)(p_a_block + mMyThreadOffsetA);
void* b_loc = (void*)(p_b_block + mMyThreadOffsetB);
// loop over k
int k_chunk = 2;
#pragma unroll
for(index_t k_begin = 0; k_begin < K; k_begin += KPerThreadLoop * k_chunk)
int k_chunk = K;
//for(index_t k_begin = 0; k_begin < K; k_begin += KPerThreadLoop * k_chunk)
index_t k_begin = 0;
{
#if 0
......@@ -417,6 +417,7 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
lgkmcnt(1);
outerProduct4x4(reg_a[0], reg_b[1], reg_c[1], reg_c[3], reg_c[5], reg_c[7]);
lgkmcnt(0);
#pragma unroll
for(int i = 0; i < k_chunk - 1; i++)
{
k = k + 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment