Commit 5662fc11 authored by mtgu0705's avatar mtgu0705
Browse files

Move pk_i4 permute outside, and enable permuteB for B.

parent b5b87293
......@@ -22,7 +22,7 @@ using CElementOp = PassThrough;
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static constexpr bool PermuteB = false;
static constexpr bool PermuteB = true;
static constexpr ck::index_t Scale_Block_N = 1;
static constexpr ck::index_t Scale_Block_K = 128;
......@@ -199,7 +199,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
}
// vector pk_i4x4 permute
#if 0
#if 1
for(int i = 0; i < N; i++)
{
for(int j = 0; j < K; j += 8)
......
......@@ -86,7 +86,7 @@ struct PassThroughPack8
__host__ __device__ constexpr void operator()(ck::half8_t& y, const ck::pk_i4x4_t& x) const
{
#if 1
#if 0
int x_permute = 0;
int bits4_0 = (bit_cast<int>(x) >> 0) & 0xF;
int bits4_1 = (bit_cast<int>(x) >> 4) & 0xF;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment