Unverified Commit 216a63b8 authored by Azure's avatar Azure Committed by GitHub
Browse files

Merge pull request #754 from moonshadow-25/dev023

Support for IQ1_S(Dynamic 1.58-bit)
parents 798e1d0c d24d3693
This diff is collapsed.
......@@ -12,10 +12,15 @@ extern "C" {
struct ggml_tensor;
struct ggml_compute_params;
/*moonll old
add more params typeb...
*/
bool iqk_mul_mat(long, long, long,int, const void*, long, int, const void*, long,float*, long, int, int);
bool iqk_mul_mat_zen4(long, long, long,int, const void*, long, int, const void*, long,float*, long, int, int);
bool iqk_mul_mat_arm82(long, long, long,int, const void*, long, int, const void*, long,float*, long, int, int);
bool iqk_mul_mat(long, long, long, int, const void*, const void*, float*, long, int, int);
bool iqk_mul_mat_zen4(long, long, long, int, const void*, const void*, float*, long, int, int);
bool iqk_mul_mat_arm82(long, long, long, int, const void*, const void*, float*, long, int, int);
bool iqk_mul_mat_moe(long, long, long, int, int, const void*, const void*, float*, long, long, const void*, int, int);
bool iqk_mul_mat_moe_zen4(long, long, long, int, int, const void*, const void*, float*, long, long, const void*, int, int);
......
......@@ -323,20 +323,17 @@ bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, const void
#if QK_K == 256
#if defined(__x86_64__) || defined(_M_X64)
#if defined(__AVX2__) && (defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))))
// if (X86_CHECK(AVX2) && X86_CHECK(FMA)) {
if (Btype == GGML_TYPE_Q8_K && Ctype == GGML_TYPE_F32) {
if (iqk_mul_mat(m, n, k * QK_K, Atype, A, B, (float*)C, ldc, ith, nth)) {
return true;
}
}
if ((Btype == GGML_TYPE_Q8_0 || Btype == GGML_TYPE_Q8_1) && Ctype == GGML_TYPE_F32) {
// assert(QK8_0 == QK8_1 == QK4_0 == QK4_1 == QK5_0 == QK5_1 == 32);
assert((QK8_0 == 32) && (QK8_1 == 32) && (QK4_0 == 32) && (QK4_1 == 32) && (QK5_0 == 32) && (QK5_1 == 32));
if (iqk_mul_mat(m, n, k * QK8_0, Atype, A, B, (float*)C, ldc, ith, nth)) {
/*
moonll
more Btype accept
}*/
if (Ctype == GGML_TYPE_F32){
if (iqk_mul_mat(m, n, k * ggml_blck_size(ggml_type(Atype)), Atype, A,lda,Btype, B,ldb, (float*)C, ldc, ith, nth)) {
return true;
}
}
// }
#endif
#elif defined __aarch64__ && defined __ARM_FEATURE_DOTPROD && !defined _MSC_VER
if (Btype == GGML_TYPE_Q8_K && Ctype == GGML_TYPE_F32) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment