__device__fwdStreaming(constParams¶ms,constBlockInfo&binfo,constintkBlockM,constintkBlockN,constintbatch_idx,constinthead_idx,constintloop_step_idx,intn_block_min,intn_block_max){//row first
__device__fwdExactStreaming(constParams¶ms,constBlockInfo&binfo,constintkBlockM,constintkBlockN,constintbatch_idx,constinthead_idx,constintloop_step_idx,intn_block_min,intn_block_max){//row first
__device__fwdBlockmask(constParams¶ms,constBlockInfo&binfo,constintkBlockM,constintkBlockN,constintbatch_idx,constinthead_idx,constintloop_step_idx,intn_block_min,intn_block_max){//row first
__device__bwdStreaming(constParams¶ms,constBlockInfo&binfo,constintkBlockM,constintkBlockN,constintbatch_idx,constinthead_idx,constintloop_step_idx,intm_block_min,intm_block_max){// col first