Commit c78a5692 authored by fengzch's avatar fengzch
Browse files

fix: compile gemm_batched.cu complete

parent 9316940c
......@@ -34,16 +34,16 @@ Tensor gemm_batched_fp16(Tensor a, // FP16 row-major [(... batch ...), M, K]
LayoutO,
ElementOutput,
cutlass::arch::OpClassTensorOp,
cutlass::arch::Sm80,
cutlass::arch::Gfx928,
cutlass::gemm::GemmShape<32, 32, 64>,
cutlass::gemm::GemmShape<32, 32, 64>,
cutlass::gemm::GemmShape<16, 8, 16>,
cutlass::gemm::GemmShape<16, 16, 16>,
cutlass::epilogue::thread::LinearCombination<ElementOutput,
128 / cutlass::sizeof_bits<ElementOutput>::value,
ElementOutput,
ElementOutput>,
cutlass::gemm::threadblock::GemmBatchedIdentityThreadblockSwizzle,
2>;
1>;
auto sizeA = cutlass::MatrixCoord(M, K);
auto sizeB = cutlass::MatrixCoord(K, N);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment