Commit d058d164 authored by Jing Zhang's avatar Jing Zhang
Browse files

merged ds_read and gemm, but register allocation is mess

parent d700ce86
...@@ -383,79 +383,7 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2 ...@@ -383,79 +383,7 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
// loop over k // loop over k
for(index_t k_begin = 0; k_begin < K; k_begin += KPerThreadLoop) for(index_t k_begin = 0; k_begin < K; k_begin += KPerThreadLoop)
{ {
#if 0 #if 1
// copy A-sub to form A
#if 0
#pragma unroll
// MRepeat = 2
for(index_t m_repeat = 0; m_repeat < MRepeat; ++m_repeat)
{
threadwise_matrix_copy(
a_block_mtx,
//MPerLevel1Cluster = 4
p_a_block + a_block_mtx.Get1dIndex(k_begin, m_repeat * MPerLevel1Cluster) +
mMyThreadOffsetA,
a_thread_mtx,
//MPerThreadSubC = 4
p_a_thread + a_thread_mtx.Get1dIndex(0, m_repeat * MPerThreadSubC),
a_thread_sub_mtx.GetLengths());
}
#else
{
auto src_index = a_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetA;
auto dst_index = a_thread_sub_mtx.Get1dIndex(0, 0);
const float4* loc = (const float4 *)(p_a_block + src_index);
float4* reg = (float4 *)(p_a_thread + dst_index);
reg[0] = loc[0];
reg[1] = loc[16];
//reg[MPerThreadSubC/4] = loc[MPerLevel1Cluster/4];
//asm volatile("\n \
//ds_read2_b64 %0, %2 offset1:1 \n \
//ds_read2_b64 %1, %2 offset0:32 offset1:33 \n \
//s_waitcnt lgkmcnt(0)"
//: "=v"(reg[0]), "=v"(reg[1])
//: "v"(__to_local((void *)(loc)))
//);
}
#endif
#if 0
// copy B-sub to form B
#pragma unroll
for(index_t n_repeat = 0; n_repeat < NRepeat; ++n_repeat)
{
threadwise_matrix_copy(
b_block_mtx,
p_b_block + b_block_mtx.Get1dIndex(k_begin, n_repeat * NPerLevel1Cluster) +
mMyThreadOffsetB,
b_thread_mtx,
p_b_thread + b_thread_mtx.Get1dIndex(0, n_repeat * NPerThreadSubC),
b_thread_sub_mtx.GetLengths());
}
#else
{
auto src_index = b_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetB;
auto dst_index = b_thread_sub_mtx.Get1dIndex(0, 0);
const float4* loc = (const float4 *)(p_b_block + src_index);
float4* reg = (float4 *)(p_b_thread + dst_index);
reg[0] = loc[0];
reg[1] = loc[8];
//reg[NPerThreadSubC/4] = loc[NPerLevel1Cluster/4];
//asm volatile("\n \
//ds_read2_b64 %0, %2 offset1:1 \n \
//ds_read2_b64 %1, %2 offset0:16 offset1:17 \n \
//s_waitcnt lgkmcnt(0)"
//: "=v"(reg[0]), "=v"(reg[1])
//: "v"(__to_local((void *)(loc)))
//);
}
#endif
#else
auto a_src_index = a_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetA; auto a_src_index = a_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetA;
auto b_src_index = b_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetB; auto b_src_index = b_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetB;
...@@ -549,89 +477,307 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2 ...@@ -549,89 +477,307 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
//); //);
#endif
// C = A * B // C = A * B
#if 0 asm volatile("\n \
threadwise_gemm(a_thread_mtx, v_mac_f32 %0, %64, %72 \n \
True, v_mac_f32 %1, %64, %73 \n \
p_a_thread, v_mac_f32 %2, %64, %74 \n \
b_thread_mtx, v_mac_f32 %3, %64, %75 \n \
False, v_mac_f32 %4, %64, %76 \n \
p_b_thread, v_mac_f32 %5, %64, %77 \n \
c_thread_mtx, v_mac_f32 %6, %64, %78 \n \
False, v_mac_f32 %7, %64, %79 \n \
p_c_thread, v_mac_f32 %8, %65, %72 \n \
f_accum); v_mac_f32 %9, %65, %73 \n \
v_mac_f32 %10, %65, %74 \n \
v_mac_f32 %11, %65, %75 \n \
v_mac_f32 %12, %65, %76 \n \
v_mac_f32 %13, %65, %77 \n \
v_mac_f32 %14, %65, %78 \n \
v_mac_f32 %15, %65, %79 \n \
v_mac_f32 %16, %66, %72 \n \
v_mac_f32 %17, %66, %73 \n \
v_mac_f32 %18, %66, %74 \n \
v_mac_f32 %19, %66, %75 \n \
v_mac_f32 %20, %66, %76 \n \
v_mac_f32 %21, %66, %77 \n \
v_mac_f32 %22, %66, %78 \n \
v_mac_f32 %23, %66, %79 \n \
v_mac_f32 %24, %67, %72 \n \
v_mac_f32 %25, %67, %73 \n \
v_mac_f32 %26, %67, %74 \n \
v_mac_f32 %27, %67, %75 \n \
v_mac_f32 %28, %67, %76 \n \
v_mac_f32 %29, %67, %77 \n \
v_mac_f32 %30, %67, %78 \n \
v_mac_f32 %31, %67, %79 \n \
v_mac_f32 %32, %68, %72 \n \
v_mac_f32 %33, %68, %73 \n \
v_mac_f32 %34, %68, %74 \n \
v_mac_f32 %35, %68, %75 \n \
v_mac_f32 %36, %68, %76 \n \
v_mac_f32 %37, %68, %77 \n \
v_mac_f32 %38, %68, %78 \n \
v_mac_f32 %39, %68, %79 \n \
v_mac_f32 %40, %69, %72 \n \
v_mac_f32 %41, %69, %73 \n \
v_mac_f32 %42, %69, %74 \n \
v_mac_f32 %43, %69, %75 \n \
v_mac_f32 %44, %69, %76 \n \
v_mac_f32 %45, %69, %77 \n \
v_mac_f32 %46, %69, %78 \n \
v_mac_f32 %47, %69, %79 \n \
v_mac_f32 %48, %70, %72 \n \
v_mac_f32 %49, %70, %73 \n \
v_mac_f32 %50, %70, %74 \n \
v_mac_f32 %51, %70, %75 \n \
v_mac_f32 %52, %70, %76 \n \
v_mac_f32 %53, %70, %77 \n \
v_mac_f32 %54, %70, %78 \n \
v_mac_f32 %55, %70, %79 \n \
v_mac_f32 %56, %71, %72 \n \
v_mac_f32 %57, %71, %73 \n \
v_mac_f32 %58, %71, %74 \n \
v_mac_f32 %59, %71, %75 \n \
v_mac_f32 %60, %71, %76 \n \
v_mac_f32 %61, %71, %77 \n \
v_mac_f32 %62, %71, %78 \n \
v_mac_f32 %63, %71, %79 \n \
"
:
"=v"(p_c_thread[0]),
"=v"(p_c_thread[1]),
"=v"(p_c_thread[2]),
"=v"(p_c_thread[3]),
"=v"(p_c_thread[4]),
"=v"(p_c_thread[5]),
"=v"(p_c_thread[6]),
"=v"(p_c_thread[7]),
"=v"(p_c_thread[8]),
"=v"(p_c_thread[9]),
"=v"(p_c_thread[10]),
"=v"(p_c_thread[11]),
"=v"(p_c_thread[12]),
"=v"(p_c_thread[13]),
"=v"(p_c_thread[14]),
"=v"(p_c_thread[15]),
"=v"(p_c_thread[16]),
"=v"(p_c_thread[17]),
"=v"(p_c_thread[18]),
"=v"(p_c_thread[19]),
"=v"(p_c_thread[20]),
"=v"(p_c_thread[21]),
"=v"(p_c_thread[22]),
"=v"(p_c_thread[23]),
"=v"(p_c_thread[24]),
"=v"(p_c_thread[25]),
"=v"(p_c_thread[26]),
"=v"(p_c_thread[27]),
"=v"(p_c_thread[28]),
"=v"(p_c_thread[29]),
"=v"(p_c_thread[30]),
"=v"(p_c_thread[31]),
"=v"(p_c_thread[32]),
"=v"(p_c_thread[33]),
"=v"(p_c_thread[34]),
"=v"(p_c_thread[35]),
"=v"(p_c_thread[36]),
"=v"(p_c_thread[37]),
"=v"(p_c_thread[38]),
"=v"(p_c_thread[39]),
"=v"(p_c_thread[40]),
"=v"(p_c_thread[41]),
"=v"(p_c_thread[42]),
"=v"(p_c_thread[43]),
"=v"(p_c_thread[44]),
"=v"(p_c_thread[45]),
"=v"(p_c_thread[46]),
"=v"(p_c_thread[47]),
"=v"(p_c_thread[48]),
"=v"(p_c_thread[49]),
"=v"(p_c_thread[50]),
"=v"(p_c_thread[51]),
"=v"(p_c_thread[52]),
"=v"(p_c_thread[53]),
"=v"(p_c_thread[54]),
"=v"(p_c_thread[55]),
"=v"(p_c_thread[56]),
"=v"(p_c_thread[57]),
"=v"(p_c_thread[58]),
"=v"(p_c_thread[59]),
"=v"(p_c_thread[60]),
"=v"(p_c_thread[61]),
"=v"(p_c_thread[62]),
"=v"(p_c_thread[63])
:
"v"(p_a_thread[0]),
"v"(p_a_thread[1]),
"v"(p_a_thread[2]),
"v"(p_a_thread[3]),
"v"(p_a_thread[4]),
"v"(p_a_thread[5]),
"v"(p_a_thread[6]),
"v"(p_a_thread[7]),
"v"(p_b_thread[0]),
"v"(p_b_thread[1]),
"v"(p_b_thread[2]),
"v"(p_b_thread[3]),
"v"(p_b_thread[4]),
"v"(p_b_thread[5]),
"v"(p_b_thread[6]),
"v"(p_b_thread[7]),
"0"(p_c_thread[0]),
"1"(p_c_thread[1]),
"2"(p_c_thread[2]),
"3"(p_c_thread[3]),
"4"(p_c_thread[4]),
"5"(p_c_thread[5]),
"6"(p_c_thread[6]),
"7"(p_c_thread[7]),
"8"(p_c_thread[8]),
"9"(p_c_thread[9]),
"10"(p_c_thread[10]),
"11"(p_c_thread[11]),
"12"(p_c_thread[12]),
"13"(p_c_thread[13]),
"14"(p_c_thread[14]),
"15"(p_c_thread[15]),
"16"(p_c_thread[16]),
"17"(p_c_thread[17]),
"18"(p_c_thread[18]),
"19"(p_c_thread[19]),
"20"(p_c_thread[20]),
"21"(p_c_thread[21]),
"22"(p_c_thread[22]),
"23"(p_c_thread[23]),
"24"(p_c_thread[24]),
"25"(p_c_thread[25]),
"26"(p_c_thread[26]),
"27"(p_c_thread[27]),
"28"(p_c_thread[28]),
"29"(p_c_thread[29]),
"30"(p_c_thread[30]),
"31"(p_c_thread[31]),
"32"(p_c_thread[32]),
"33"(p_c_thread[33]),
"34"(p_c_thread[34]),
"35"(p_c_thread[35]),
"36"(p_c_thread[36]),
"37"(p_c_thread[37]),
"38"(p_c_thread[38]),
"39"(p_c_thread[39]),
"40"(p_c_thread[40]),
"41"(p_c_thread[41]),
"42"(p_c_thread[42]),
"43"(p_c_thread[43]),
"44"(p_c_thread[44]),
"45"(p_c_thread[45]),
"46"(p_c_thread[46]),
"47"(p_c_thread[47]),
"48"(p_c_thread[48]),
"49"(p_c_thread[49]),
"50"(p_c_thread[50]),
"51"(p_c_thread[51]),
"52"(p_c_thread[52]),
"53"(p_c_thread[53]),
"54"(p_c_thread[54]),
"55"(p_c_thread[55]),
"56"(p_c_thread[56]),
"57"(p_c_thread[57]),
"58"(p_c_thread[58]),
"59"(p_c_thread[59]),
"60"(p_c_thread[60]),
"61"(p_c_thread[61]),
"62"(p_c_thread[62]),
"63"(p_c_thread[63])
);
#else #else
auto a_src_index = a_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetA;
auto b_src_index = b_block_mtx.Get1dIndex(k_begin, 0) + mMyThreadOffsetB;
auto dst_index = a_thread_sub_mtx.Get1dIndex(0, 0);
const float4* a_loc = (const float4 *)(p_a_block + a_src_index);
const float4* b_loc = (const float4 *)(p_b_block + b_src_index);
float4* reg = (float4 *)(p_a_thread + dst_index);
asm volatile("\n \ asm volatile("\n \
v_mac_f32 %0, %64, %72 \n \ ds_read2_b64 %0, %84 offset1:1 \n \
v_mac_f32 %1, %64, %73 \n \ ds_read2_b64 %1, %84 offset0:32 offset1:33 \n \
v_mac_f32 %2, %64, %74 \n \ ds_read2_b64 %2, %85 offset1:1 \n \
v_mac_f32 %3, %64, %75 \n \ ds_read2_b64 %3, %85 offset0:16 offset1:17 \n \
v_mac_f32 %4, %64, %76 \n \ s_waitcnt lgkmcnt(0) \n \
v_mac_f32 %5, %64, %77 \n \ v_mac_f32 %4, %68, %76 \n \
v_mac_f32 %6, %64, %78 \n \ v_mac_f32 %5, %68, %77 \n \
v_mac_f32 %7, %64, %79 \n \ v_mac_f32 %6, %68, %78 \n \
v_mac_f32 %8, %65, %72 \n \ v_mac_f32 %7, %68, %79 \n \
v_mac_f32 %9, %65, %73 \n \ v_mac_f32 %8, %68, %80 \n \
v_mac_f32 %10, %65, %74 \n \ v_mac_f32 %9, %68, %81 \n \
v_mac_f32 %11, %65, %75 \n \ v_mac_f32 %10, %68, %82 \n \
v_mac_f32 %12, %65, %76 \n \ v_mac_f32 %11, %68, %83 \n \
v_mac_f32 %13, %65, %77 \n \ v_mac_f32 %12, %69, %76 \n \
v_mac_f32 %14, %65, %78 \n \ v_mac_f32 %13, %69, %77 \n \
v_mac_f32 %15, %65, %79 \n \ v_mac_f32 %14, %69, %78 \n \
v_mac_f32 %16, %66, %72 \n \ v_mac_f32 %15, %69, %79 \n \
v_mac_f32 %17, %66, %73 \n \ v_mac_f32 %16, %69, %80 \n \
v_mac_f32 %18, %66, %74 \n \ v_mac_f32 %17, %69, %81 \n \
v_mac_f32 %19, %66, %75 \n \ v_mac_f32 %18, %69, %82 \n \
v_mac_f32 %20, %66, %76 \n \ v_mac_f32 %19, %69, %83 \n \
v_mac_f32 %21, %66, %77 \n \ v_mac_f32 %20, %70, %76 \n \
v_mac_f32 %22, %66, %78 \n \ v_mac_f32 %21, %70, %77 \n \
v_mac_f32 %23, %66, %79 \n \ v_mac_f32 %22, %70, %78 \n \
v_mac_f32 %24, %67, %72 \n \ v_mac_f32 %23, %70, %79 \n \
v_mac_f32 %25, %67, %73 \n \ v_mac_f32 %24, %70, %80 \n \
v_mac_f32 %26, %67, %74 \n \ v_mac_f32 %25, %70, %81 \n \
v_mac_f32 %27, %67, %75 \n \ v_mac_f32 %26, %70, %82 \n \
v_mac_f32 %28, %67, %76 \n \ v_mac_f32 %27, %70, %83 \n \
v_mac_f32 %29, %67, %77 \n \ v_mac_f32 %28, %71, %76 \n \
v_mac_f32 %30, %67, %78 \n \ v_mac_f32 %29, %71, %77 \n \
v_mac_f32 %31, %67, %79 \n \ v_mac_f32 %30, %71, %78 \n \
v_mac_f32 %32, %68, %72 \n \ v_mac_f32 %31, %71, %79 \n \
v_mac_f32 %33, %68, %73 \n \ v_mac_f32 %32, %71, %80 \n \
v_mac_f32 %34, %68, %74 \n \ v_mac_f32 %33, %71, %81 \n \
v_mac_f32 %35, %68, %75 \n \ v_mac_f32 %34, %71, %82 \n \
v_mac_f32 %36, %68, %76 \n \ v_mac_f32 %35, %71, %83 \n \
v_mac_f32 %37, %68, %77 \n \ v_mac_f32 %36, %72, %76 \n \
v_mac_f32 %38, %68, %78 \n \ v_mac_f32 %37, %72, %77 \n \
v_mac_f32 %39, %68, %79 \n \ v_mac_f32 %38, %72, %78 \n \
v_mac_f32 %40, %69, %72 \n \ v_mac_f32 %39, %72, %79 \n \
v_mac_f32 %41, %69, %73 \n \ v_mac_f32 %40, %72, %80 \n \
v_mac_f32 %42, %69, %74 \n \ v_mac_f32 %41, %72, %81 \n \
v_mac_f32 %43, %69, %75 \n \ v_mac_f32 %42, %72, %82 \n \
v_mac_f32 %44, %69, %76 \n \ v_mac_f32 %43, %72, %83 \n \
v_mac_f32 %45, %69, %77 \n \ v_mac_f32 %44, %73, %76 \n \
v_mac_f32 %46, %69, %78 \n \ v_mac_f32 %45, %73, %77 \n \
v_mac_f32 %47, %69, %79 \n \ v_mac_f32 %46, %73, %78 \n \
v_mac_f32 %48, %70, %72 \n \ v_mac_f32 %47, %73, %79 \n \
v_mac_f32 %49, %70, %73 \n \ v_mac_f32 %48, %73, %80 \n \
v_mac_f32 %50, %70, %74 \n \ v_mac_f32 %49, %73, %81 \n \
v_mac_f32 %51, %70, %75 \n \ v_mac_f32 %50, %73, %82 \n \
v_mac_f32 %52, %70, %76 \n \ v_mac_f32 %51, %73, %83 \n \
v_mac_f32 %53, %70, %77 \n \ v_mac_f32 %52, %74, %76 \n \
v_mac_f32 %54, %70, %78 \n \ v_mac_f32 %53, %74, %77 \n \
v_mac_f32 %55, %70, %79 \n \ v_mac_f32 %54, %74, %78 \n \
v_mac_f32 %56, %71, %72 \n \ v_mac_f32 %55, %74, %79 \n \
v_mac_f32 %57, %71, %73 \n \ v_mac_f32 %56, %74, %80 \n \
v_mac_f32 %58, %71, %74 \n \ v_mac_f32 %57, %74, %81 \n \
v_mac_f32 %59, %71, %75 \n \ v_mac_f32 %58, %74, %82 \n \
v_mac_f32 %60, %71, %76 \n \ v_mac_f32 %59, %74, %83 \n \
v_mac_f32 %61, %71, %77 \n \ v_mac_f32 %60, %75, %76 \n \
v_mac_f32 %62, %71, %78 \n \ v_mac_f32 %61, %75, %77 \n \
v_mac_f32 %63, %71, %79 \n \ v_mac_f32 %62, %75, %78 \n \
v_mac_f32 %63, %75, %79 \n \
v_mac_f32 %64, %75, %80 \n \
v_mac_f32 %65, %75, %81 \n \
v_mac_f32 %66, %75, %82 \n \
v_mac_f32 %67, %75, %83 \n \
" "
: :
"=v"(reg[0]),
"=v"(reg[1]),
"=v"(reg[2]),
"=v"(reg[3]),
"=v"(p_c_thread[0]), "=v"(p_c_thread[0]),
"=v"(p_c_thread[1]), "=v"(p_c_thread[1]),
"=v"(p_c_thread[2]), "=v"(p_c_thread[2]),
...@@ -713,70 +859,72 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2 ...@@ -713,70 +859,72 @@ struct BlockwiseGemmBlockABlockBThreadCTransANormalBNormalC_v2
"v"(p_b_thread[5]), "v"(p_b_thread[5]),
"v"(p_b_thread[6]), "v"(p_b_thread[6]),
"v"(p_b_thread[7]), "v"(p_b_thread[7]),
"0"(p_c_thread[0]), "v"(__to_local((void *)(a_loc))),
"1"(p_c_thread[1]), "v"(__to_local((void *)(b_loc))),
"2"(p_c_thread[2]), "4"(p_c_thread[0]),
"3"(p_c_thread[3]), "5"(p_c_thread[1]),
"4"(p_c_thread[4]), "6"(p_c_thread[2]),
"5"(p_c_thread[5]), "7"(p_c_thread[3]),
"6"(p_c_thread[6]), "8"(p_c_thread[4]),
"7"(p_c_thread[7]), "9"(p_c_thread[5]),
"8"(p_c_thread[8]), "10"(p_c_thread[6]),
"9"(p_c_thread[9]), "11"(p_c_thread[7]),
"10"(p_c_thread[10]), "12"(p_c_thread[8]),
"11"(p_c_thread[11]), "13"(p_c_thread[9]),
"12"(p_c_thread[12]), "14"(p_c_thread[10]),
"13"(p_c_thread[13]), "15"(p_c_thread[11]),
"14"(p_c_thread[14]), "16"(p_c_thread[12]),
"15"(p_c_thread[15]), "17"(p_c_thread[13]),
"16"(p_c_thread[16]), "18"(p_c_thread[14]),
"17"(p_c_thread[17]), "19"(p_c_thread[15]),
"18"(p_c_thread[18]), "20"(p_c_thread[16]),
"19"(p_c_thread[19]), "21"(p_c_thread[17]),
"20"(p_c_thread[20]), "22"(p_c_thread[18]),
"21"(p_c_thread[21]), "23"(p_c_thread[19]),
"22"(p_c_thread[22]), "24"(p_c_thread[20]),
"23"(p_c_thread[23]), "25"(p_c_thread[21]),
"24"(p_c_thread[24]), "26"(p_c_thread[22]),
"25"(p_c_thread[25]), "27"(p_c_thread[23]),
"26"(p_c_thread[26]), "28"(p_c_thread[24]),
"27"(p_c_thread[27]), "29"(p_c_thread[25]),
"28"(p_c_thread[28]), "30"(p_c_thread[26]),
"29"(p_c_thread[29]), "31"(p_c_thread[27]),
"30"(p_c_thread[30]), "32"(p_c_thread[28]),
"31"(p_c_thread[31]), "33"(p_c_thread[29]),
"32"(p_c_thread[32]), "34"(p_c_thread[30]),
"33"(p_c_thread[33]), "35"(p_c_thread[31]),
"34"(p_c_thread[34]), "36"(p_c_thread[32]),
"35"(p_c_thread[35]), "37"(p_c_thread[33]),
"36"(p_c_thread[36]), "38"(p_c_thread[34]),
"37"(p_c_thread[37]), "39"(p_c_thread[35]),
"38"(p_c_thread[38]), "40"(p_c_thread[36]),
"39"(p_c_thread[39]), "41"(p_c_thread[37]),
"40"(p_c_thread[40]), "42"(p_c_thread[38]),
"41"(p_c_thread[41]), "43"(p_c_thread[39]),
"42"(p_c_thread[42]), "44"(p_c_thread[40]),
"43"(p_c_thread[43]), "45"(p_c_thread[41]),
"44"(p_c_thread[44]), "46"(p_c_thread[42]),
"45"(p_c_thread[45]), "47"(p_c_thread[43]),
"46"(p_c_thread[46]), "48"(p_c_thread[44]),
"47"(p_c_thread[47]), "49"(p_c_thread[45]),
"48"(p_c_thread[48]), "50"(p_c_thread[46]),
"49"(p_c_thread[49]), "51"(p_c_thread[47]),
"50"(p_c_thread[50]), "52"(p_c_thread[48]),
"51"(p_c_thread[51]), "53"(p_c_thread[49]),
"52"(p_c_thread[52]), "54"(p_c_thread[50]),
"53"(p_c_thread[53]), "55"(p_c_thread[51]),
"54"(p_c_thread[54]), "56"(p_c_thread[52]),
"55"(p_c_thread[55]), "57"(p_c_thread[53]),
"56"(p_c_thread[56]), "58"(p_c_thread[54]),
"57"(p_c_thread[57]), "59"(p_c_thread[55]),
"58"(p_c_thread[58]), "60"(p_c_thread[56]),
"59"(p_c_thread[59]), "61"(p_c_thread[57]),
"60"(p_c_thread[60]), "62"(p_c_thread[58]),
"61"(p_c_thread[61]), "63"(p_c_thread[59]),
"62"(p_c_thread[62]), "64"(p_c_thread[60]),
"63"(p_c_thread[63]) "65"(p_c_thread[61]),
"66"(p_c_thread[62]),
"67"(p_c_thread[63])
); );
#endif #endif
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment