Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
26bd889f
Commit
26bd889f
authored
Mar 09, 2025
by
liu.shen
Browse files
fix #829: 兼容Intel Cascade Lake架构的CPU
parent
407e1b9a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
2 deletions
+23
-2
third_party/llamafile/iqk_mul_mat.inc
third_party/llamafile/iqk_mul_mat.inc
+23
-2
No files found.
third_party/llamafile/iqk_mul_mat.inc
View file @
26bd889f
...
...
@@ -2385,7 +2385,12 @@ struct SimpleBits {
__m256i
values
[
4
];
};
// fix for #829: 添加对 AVX512VPOPCNTDQ 的检测
#if defined(HAVE_FANCY_SIMD) && defined(__AVX512VPOPCNTDQ__)
#define HAVE_AVX512_POPCNT 1
#else
#define HAVE_AVX512_POPCNT 0
#endif
struct
EvenSignHelper
{
#if defined HAVE_FANCY_SIMD
...
...
@@ -2396,7 +2401,23 @@ struct EvenSignHelper {
};
IQK_ALWAYS_INLINE
void
sign_2_values
(
__m256i
aux
,
__m256i
*
values
)
const
{
aux
=
_mm256_and_si256
(
_mm256_srlv_epi32
(
aux
,
shifts
),
mask
);
auto
pcnt
=
_mm256_popcnt_epi32
(
aux
);
// fix for #829: 兼容Intel Cascade Lake架构的CPU,如果不支持AVX512VPOPCNTDQ扩展,则使用替代实现
#if HAVE_AVX512_POPCNT
auto
pcnt
=
_mm256_popcnt_epi32
(
aux
);
#else
// 提供替代实现,使用标准的位计数方法
__m256i
pcnt
;
int
*
pcnt_ptr
=
reinterpret_cast
<
int
*>
(
&
pcnt
);
int
*
aux_ptr
=
reinterpret_cast
<
int
*>
(
&
aux
);
// 直接获取 aux 的地址,避免不必要的复制
#pragma unroll 8 // 提示编译器展开循环,提高 SIMD 计算吞吐量
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
pcnt_ptr
[
i
]
=
__builtin_popcount
(
aux_ptr
[
i
]);
// 使用编译器内置 popcount
}
#endif
sbits_t
sbits
;
sbits
.
vec
=
_mm256_cvtepi32_epi8
(
_mm256_or_si256
(
aux
,
_mm256_slli_epi32
(
_mm256_and_si256
(
pcnt
,
mone
),
7
)));
values
[
0
]
=
_mm256_mask_sub_epi8
(
values
[
0
],
sbits
.
mask
[
0
],
_mm256_setzero_si256
(),
values
[
0
]);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment