Commit 6835a32e authored by Shucai Xiao's avatar Shucai Xiao
Browse files

backup temp code changes.

parent 5e24fdf9
......@@ -215,6 +215,10 @@ argument miopen_gemm::compute(context& ctx,
auto to_pointer = [&](auto&& arg) { return to_rocblas_type(as.from(arg.data())); };
if(num_matrices == 1)
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm(as,
ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
......
......@@ -90,10 +90,33 @@ argument miopen_quant_gemm::compute(context& ctx,
assert(!transa or (lda % 4 == 0));
assert(transb or (ldb % 4 == 0));
// need to pack B in thi scenario
if (!transb)
{
int nb = 4;
for(int i_m = 0; i_m < m; i_m++)
{
for(int i_k = 0; i_k < k; i_k++)
{
A_packed[i_k % nb + (i_m + (i_k / nb) * lda) * nb] = A[i_m + i_k * lda];
}
}
}
// need to pack A in this scenario
if (transa)
{
}
auto num_matrices = std::accumulate(
out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies<std::size_t>());
if(num_matrices == 1)
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm_ex(ctx.get_stream().get_rocblas(),
transb ? rocblas_operation_transpose : rocblas_operation_none,
transa ? rocblas_operation_transpose : rocblas_operation_none,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment