Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
1deafd82
Unverified
Commit
1deafd82
authored
Jan 08, 2025
by
Jeffrey Morgan
Committed by
GitHub
Jan 08, 2025
Browse files
llama: update vendored code to commit 46e3556 (#8308)
parent
57f038ec
Changes
305
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
40 additions
and
27 deletions
+40
-27
llama/ggml-cpu-quants.c
llama/ggml-cpu-quants.c
+6
-2
llama/ggml-cpu-quants.h
llama/ggml-cpu-quants.h
+1
-1
llama/ggml-cpu-traits.cpp
llama/ggml-cpu-traits.cpp
+1
-1
llama/ggml-cpu-traits.h
llama/ggml-cpu-traits.h
+1
-1
llama/ggml-cpu.c
llama/ggml-cpu.c
+7
-7
llama/ggml-cpu.cpp
llama/ggml-cpu.cpp
+10
-1
llama/ggml-cpu.h
llama/ggml-cpu.h
+1
-1
llama/ggml-cuda.h
llama/ggml-cuda.h
+1
-1
llama/ggml-cuda/acc.cu
llama/ggml-cuda/acc.cu
+1
-1
llama/ggml-cuda/acc.cuh
llama/ggml-cuda/acc.cuh
+1
-1
llama/ggml-cuda/arange.cu
llama/ggml-cuda/arange.cu
+1
-1
llama/ggml-cuda/arange.cuh
llama/ggml-cuda/arange.cuh
+1
-1
llama/ggml-cuda/argmax.cu
llama/ggml-cuda/argmax.cu
+1
-1
llama/ggml-cuda/argmax.cuh
llama/ggml-cuda/argmax.cuh
+1
-1
llama/ggml-cuda/argsort.cu
llama/ggml-cuda/argsort.cu
+1
-1
llama/ggml-cuda/argsort.cuh
llama/ggml-cuda/argsort.cuh
+1
-1
llama/ggml-cuda/binbcast.cu
llama/ggml-cuda/binbcast.cu
+1
-1
llama/ggml-cuda/binbcast.cuh
llama/ggml-cuda/binbcast.cuh
+1
-1
llama/ggml-cuda/clamp.cu
llama/ggml-cuda/clamp.cu
+1
-1
llama/ggml-cuda/clamp.cuh
llama/ggml-cuda/clamp.cuh
+1
-1
No files found.
llama/ggml-cpu-quants.c
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
@@ -129,10 +129,14 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
}
static
inline
__m256
mul_sum_us8_pairs_float
(
const
__m256i
ax
,
const
__m256i
sy
)
{
#if
defined(__AVXVNNI__) || (
defined(__AVX512VNNI__) && defined(__AVX512VL__)
)
#if defined(__AVX512VNNI__) && defined(__AVX512VL__)
const
__m256i
zero
=
_mm256_setzero_si256
();
const
__m256i
summed_pairs
=
_mm256_dpbusd_epi32
(
zero
,
ax
,
sy
);
return
_mm256_cvtepi32_ps
(
summed_pairs
);
#elif defined(__AVXVNNI__)
const
__m256i
zero
=
_mm256_setzero_si256
();
const
__m256i
summed_pairs
=
_mm256_dpbusd_avx_epi32
(
zero
,
ax
,
sy
);
return
_mm256_cvtepi32_ps
(
summed_pairs
);
#else
// Perform multiplication and create 16-bit values
const
__m256i
dot
=
_mm256_maddubs_epi16
(
ax
,
sy
);
...
...
llama/ggml-cpu-quants.h
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cpu-traits.cpp
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cpu-traits.h
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cpu.c
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
@@ -1012,7 +1012,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
#define GGML_F16_STEP 32
#define GGML_F16_EPR 4
static
inline
__m128
__sse_f16x4_load
(
ggml_fp16_t
*
x
)
{
static
inline
__m128
__sse_f16x4_load
(
const
ggml_fp16_t
*
x
)
{
float
tmp
[
4
];
tmp
[
0
]
=
GGML_FP16_TO_FP32
(
x
[
0
]);
...
...
@@ -1023,7 +1023,7 @@ static inline __m128 __sse_f16x4_load(ggml_fp16_t *x) {
return
_mm_loadu_ps
(
tmp
);
}
static
inline
void
__sse_f16x4_store
(
ggml_fp16_t
*
x
,
__m128
y
)
{
static
inline
void
__sse_f16x4_store
(
ggml_fp16_t
*
x
,
__m128
y
)
{
float
arr
[
4
];
_mm_storeu_ps
(
arr
,
y
);
...
...
@@ -7445,14 +7445,14 @@ static void ggml_compute_forward_mul_mat(
if
(
src1_cont
)
{
for
(
int64_t
i13
=
0
;
i13
<
ne13
;
i13
++
)
for
(
int64_t
i12
=
0
;
i12
<
ne12
;
i12
++
)
if
(
!
llamafile_sgemm
(
ne01
,
ne11
,
ne00
/
ggml_blck_size
(
src0
->
type
),
if
(
!
llamafile_sgemm
(
params
,
ne01
,
ne11
,
ne00
/
ggml_blck_size
(
src0
->
type
),
(
const
char
*
)
src0
->
data
+
i12
/
r2
*
nb02
+
i13
/
r3
*
nb03
,
nb01
/
ggml_type_size
(
src0
->
type
),
(
const
char
*
)
src1
->
data
+
i12
*
nb12
+
i13
*
nb13
,
nb11
/
ggml_type_size
(
src1
->
type
),
(
char
*
)
dst
->
data
+
i12
*
nb2
+
i13
*
nb3
,
nb1
/
ggml_type_size
(
dst
->
type
),
ith
,
nth
,
src0
->
type
,
src1
->
type
,
dst
->
type
))
...
...
@@ -7497,14 +7497,14 @@ UseGgmlGemm1:;
for
(
int64_t
i13
=
0
;
i13
<
ne13
;
i13
++
)
for
(
int64_t
i12
=
0
;
i12
<
ne12
;
i12
++
)
if
(
!
llamafile_sgemm
(
ne01
,
ne11
,
ne00
/
ggml_blck_size
(
src0
->
type
),
if
(
!
llamafile_sgemm
(
params
,
ne01
,
ne11
,
ne00
/
ggml_blck_size
(
src0
->
type
),
(
const
char
*
)
src0
->
data
+
i12
/
r2
*
nb02
+
i13
/
r3
*
nb03
,
nb01
/
ggml_type_size
(
src0
->
type
),
(
const
char
*
)
wdata
+
(
i12
*
ne11
+
i13
*
ne12
*
ne11
)
*
row_size
,
row_size
/
ggml_type_size
(
vec_dot_type
),
(
char
*
)
dst
->
data
+
i12
*
nb2
+
i13
*
nb3
,
nb1
/
ggml_type_size
(
dst
->
type
),
ith
,
nth
,
src0
->
type
,
vec_dot_type
,
dst
->
type
))
...
...
llama/ggml-cpu.cpp
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
@@ -419,8 +419,11 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
switch
(
op
->
op
)
{
case
GGML_OP_CPY
:
return
op
->
type
!=
GGML_TYPE_IQ3_XXS
&&
op
->
type
!=
GGML_TYPE_IQ3_S
&&
op
->
type
!=
GGML_TYPE_IQ2_XXS
&&
op
->
type
!=
GGML_TYPE_IQ2_XS
&&
op
->
type
!=
GGML_TYPE_IQ2_S
&&
op
->
type
!=
GGML_TYPE_IQ1_S
&&
op
->
type
!=
GGML_TYPE_IQ1_M
;
// missing type_traits.from_float
case
GGML_OP_MUL_MAT
:
...
...
@@ -544,6 +547,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if
(
ggml_cpu_has_sve
())
{
features
.
push_back
({
"SVE"
,
"1"
});
}
if
(
ggml_cpu_has_dotprod
())
{
features
.
push_back
({
"DOTPROD"
,
"1"
});
}
if
(
ggml_cpu_has_matmul_int8
())
{
features
.
push_back
({
"MATMUL_INT8"
,
"1"
});
}
if
(
ggml_cpu_get_sve_cnt
()
>
0
)
{
static
std
::
string
sve_cnt
=
std
::
to_string
(
ggml_cpu_get_sve_cnt
());
features
.
push_back
({
"SVE_CNT"
,
sve_cnt
.
c_str
()
});
...
...
llama/ggml-cpu.h
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda.h
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/acc.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/acc.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/arange.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/arange.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/argmax.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/argmax.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/argsort.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/argsort.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/binbcast.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/binbcast.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/clamp.cu
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
llama/ggml-cuda/clamp.cuh
View file @
1deafd82
/**
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
* llama.cpp - commit
46e3556e01b824e52395fb050b29804b6cff2a7c
- do not edit this file
*
* MIT License
*
...
...
Prev
1
2
3
4
5
6
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment