Unverified Commit 2753a4a6 authored by ZiWei Yuan's avatar ZiWei Yuan Committed by GitHub
Browse files

Merge pull request #810 from kvcache-ai/v0.2.3

V0.2.3
parents f03faa53 9c343b4f
...@@ -323,20 +323,17 @@ bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, const void ...@@ -323,20 +323,17 @@ bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, const void
#if QK_K == 256 #if QK_K == 256
#if defined(__x86_64__) || defined(_M_X64) #if defined(__x86_64__) || defined(_M_X64)
#if defined(__AVX2__) && (defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__)))) #if defined(__AVX2__) && (defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))))
// if (X86_CHECK(AVX2) && X86_CHECK(FMA)) { /*
if (Btype == GGML_TYPE_Q8_K && Ctype == GGML_TYPE_F32) { moonll
if (iqk_mul_mat(m, n, k * QK_K, Atype, A, B, (float*)C, ldc, ith, nth)) { more Btype accept
return true; }*/
}
} if (Ctype == GGML_TYPE_F32){
if ((Btype == GGML_TYPE_Q8_0 || Btype == GGML_TYPE_Q8_1) && Ctype == GGML_TYPE_F32) { if (iqk_mul_mat(m, n, k * ggml_blck_size(ggml_type(Atype)), Atype, A,lda,Btype, B,ldb, (float*)C, ldc, ith, nth)) {
// assert(QK8_0 == QK8_1 == QK4_0 == QK4_1 == QK5_0 == QK5_1 == 32);
assert((QK8_0 == 32) && (QK8_1 == 32) && (QK4_0 == 32) && (QK4_1 == 32) && (QK5_0 == 32) && (QK5_1 == 32));
if (iqk_mul_mat(m, n, k * QK8_0, Atype, A, B, (float*)C, ldc, ith, nth)) {
return true; return true;
} }
} }
// }
#endif #endif
#elif defined __aarch64__ && defined __ARM_FEATURE_DOTPROD && !defined _MSC_VER #elif defined __aarch64__ && defined __ARM_FEATURE_DOTPROD && !defined _MSC_VER
if (Btype == GGML_TYPE_Q8_K && Ctype == GGML_TYPE_F32) { if (Btype == GGML_TYPE_Q8_K && Ctype == GGML_TYPE_F32) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment