Commit 5662fc11 authored by mtgu0705's avatar mtgu0705
Browse files

Move pk_i4 permute outside, and enable permuteB for B.

parent b5b87293
...@@ -22,7 +22,7 @@ using CElementOp = PassThrough; ...@@ -22,7 +22,7 @@ using CElementOp = PassThrough;
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default; static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
static constexpr bool PermuteB = false; static constexpr bool PermuteB = true;
static constexpr ck::index_t Scale_Block_N = 1; static constexpr ck::index_t Scale_Block_N = 1;
static constexpr ck::index_t Scale_Block_K = 128; static constexpr ck::index_t Scale_Block_K = 128;
...@@ -199,7 +199,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -199,7 +199,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
} }
// vector pk_i4x4 permute // vector pk_i4x4 permute
#if 0 #if 1
for(int i = 0; i < N; i++) for(int i = 0; i < N; i++)
{ {
for(int j = 0; j < K; j += 8) for(int j = 0; j < K; j += 8)
......
...@@ -86,7 +86,7 @@ struct PassThroughPack8 ...@@ -86,7 +86,7 @@ struct PassThroughPack8
__host__ __device__ constexpr void operator()(ck::half8_t& y, const ck::pk_i4x4_t& x) const __host__ __device__ constexpr void operator()(ck::half8_t& y, const ck::pk_i4x4_t& x) const
{ {
#if 1 #if 0
int x_permute = 0; int x_permute = 0;
int bits4_0 = (bit_cast<int>(x) >> 0) & 0xF; int bits4_0 = (bit_cast<int>(x) >> 0) & 0xF;
int bits4_1 = (bit_cast<int>(x) >> 4) & 0xF; int bits4_1 = (bit_cast<int>(x) >> 4) & 0xF;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment