usingElementA=cutlass::mx_float4_t<cutlass::float_e2m1_t>;// Element type for A matrix operand
usingLayoutATag=cutlass::layout::RowMajor;// Layout type for A matrix operand
staticconstexprintAlignmentA=128;// Memory access granularity/alignment of A matrix in units of elements (up to 16 bytes)
// B matrix configuration
usingElementB=cutlass::mx_float4_t<cutlass::float_e2m1_t>;// Element type for B matrix operand
usingLayoutBTag=cutlass::layout::ColumnMajor;// Layout type for B matrix operand
staticconstexprintAlignmentB=128;// Memory access granularity/alignment of B matrix in units of elements (up to 16 bytes)
// C/D matrix configuration
usingElementD=cutlass::bfloat16_t;// Element type for D matrix operand
usingElementC=cutlass::bfloat16_t;// Element type for C matrix operand
usingLayoutCTag=cutlass::layout::RowMajor;// Layout type for C matrix operand
usingLayoutDTag=cutlass::layout::RowMajor;// Layout type for D matrix operand
staticconstexprintAlignmentD=128/cutlass::sizeof_bits<ElementD>::value;// Memory access granularity/alignment of C matrix in units of elements (up to 16 bytes)
staticconstexprintAlignmentC=128/cutlass::sizeof_bits<ElementC>::value;// Memory access granularity/alignment of C matrix in units of elements (up to 16 bytes)
// Kernel functional config
usingElementAccumulator=float;// Element type for internal accumulation
usingArchTag=cutlass::arch::Sm120;// Tag indicating the minimum SM that supports the intended feature
usingOperatorClass=cutlass::arch::OpClassBlockScaledTensorOp;// Operator class tag
usingLayoutSFA=typenameGemm::GemmKernel::CollectiveMainloop::LayoutSFA;// Scale Factor tensors have an interleaved layout. Bring Layout instead of stride.
usingLayoutSFB=typenameGemm::GemmKernel::CollectiveMainloop::LayoutSFB;// Scale Factor tensors have an interleaved layout. Bring Layout instead of stride.