Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
216a63b8
Unverified
Commit
216a63b8
authored
Mar 03, 2025
by
Azure
Committed by
GitHub
Mar 03, 2025
Browse files
Merge pull request #754 from moonshadow-25/dev023
Support for IQ1_S(Dynamic 1.58-bit)
parents
798e1d0c
d24d3693
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1870 additions
and
89 deletions
+1870
-89
third_party/llamafile/iqk_mul_mat.inc
third_party/llamafile/iqk_mul_mat.inc
+1854
-75
third_party/llamafile/sgemm.h
third_party/llamafile/sgemm.h
+8
-3
third_party/llamafile/tinyblas_cpu_sgemm.inc
third_party/llamafile/tinyblas_cpu_sgemm.inc
+8
-11
No files found.
third_party/llamafile/iqk_mul_mat.inc
View file @
216a63b8
This diff is collapsed.
Click to expand it.
third_party/llamafile/sgemm.h
View file @
216a63b8
...
@@ -12,10 +12,15 @@ extern "C" {
...
@@ -12,10 +12,15 @@ extern "C" {
struct
ggml_tensor
;
struct
ggml_tensor
;
struct
ggml_compute_params
;
struct
ggml_compute_params
;
/*moonll old
add more params typeb...
*/
bool
iqk_mul_mat
(
long
,
long
,
long
,
int
,
const
void
*
,
long
,
int
,
const
void
*
,
long
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat_zen4
(
long
,
long
,
long
,
int
,
const
void
*
,
long
,
int
,
const
void
*
,
long
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat_arm82
(
long
,
long
,
long
,
int
,
const
void
*
,
long
,
int
,
const
void
*
,
long
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat
(
long
,
long
,
long
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat_zen4
(
long
,
long
,
long
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat_arm82
(
long
,
long
,
long
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
int
,
int
);
bool
iqk_mul_mat_moe
(
long
,
long
,
long
,
int
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
long
,
const
void
*
,
int
,
int
);
bool
iqk_mul_mat_moe
(
long
,
long
,
long
,
int
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
long
,
const
void
*
,
int
,
int
);
bool
iqk_mul_mat_moe_zen4
(
long
,
long
,
long
,
int
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
long
,
const
void
*
,
int
,
int
);
bool
iqk_mul_mat_moe_zen4
(
long
,
long
,
long
,
int
,
int
,
const
void
*
,
const
void
*
,
float
*
,
long
,
long
,
const
void
*
,
int
,
int
);
...
...
third_party/llamafile/tinyblas_cpu_sgemm.inc
View file @
216a63b8
...
@@ -323,20 +323,17 @@ bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, const void
...
@@ -323,20 +323,17 @@ bool llamafile_sgemm(long m, long n, long k, const void* A, long lda, const void
#if QK_K == 256
#if QK_K == 256
#if defined(__x86_64__) || defined(_M_X64)
#if defined(__x86_64__) || defined(_M_X64)
#if defined(__AVX2__) && (defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))))
#if defined(__AVX2__) && (defined(__FMA__) || (defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))))
// if (X86_CHECK(AVX2) && X86_CHECK(FMA)) {
/*
if
(
Btype
==
GGML_TYPE_Q8_K
&&
Ctype
==
GGML_TYPE_F32
)
{
moonll
if
(
iqk_mul_mat
(
m
,
n
,
k
*
QK_K
,
Atype
,
A
,
B
,
(
float
*
)
C
,
ldc
,
ith
,
nth
))
{
more Btype accept
return
true
;
}*/
}
}
if
(
Ctype
==
GGML_TYPE_F32
){
if
((
Btype
==
GGML_TYPE_Q8_0
||
Btype
==
GGML_TYPE_Q8_1
)
&&
Ctype
==
GGML_TYPE_F32
)
{
if
(
iqk_mul_mat
(
m
,
n
,
k
*
ggml_blck_size
(
ggml_type
(
Atype
)),
Atype
,
A
,
lda
,
Btype
,
B
,
ldb
,
(
float
*
)
C
,
ldc
,
ith
,
nth
))
{
// assert(QK8_0 == QK8_1 == QK4_0 == QK4_1 == QK5_0 == QK5_1 == 32);
assert
((
QK8_0
==
32
)
&&
(
QK8_1
==
32
)
&&
(
QK4_0
==
32
)
&&
(
QK4_1
==
32
)
&&
(
QK5_0
==
32
)
&&
(
QK5_1
==
32
));
if
(
iqk_mul_mat
(
m
,
n
,
k
*
QK8_0
,
Atype
,
A
,
B
,
(
float
*
)
C
,
ldc
,
ith
,
nth
))
{
return
true
;
return
true
;
}
}
}
}
// }
#endif
#endif
#elif defined __aarch64__ && defined __ARM_FEATURE_DOTPROD && !defined _MSC_VER
#elif defined __aarch64__ && defined __ARM_FEATURE_DOTPROD && !defined _MSC_VER
if
(
Btype
==
GGML_TYPE_Q8_K
&&
Ctype
==
GGML_TYPE_F32
)
{
if
(
Btype
==
GGML_TYPE_Q8_K
&&
Ctype
==
GGML_TYPE_F32
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment