Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
7a81daf0
Unverified
Commit
7a81daf0
authored
Dec 14, 2024
by
Jeffrey Morgan
Committed by
GitHub
Dec 14, 2024
Browse files
llama: update vendor code to commit ba1cb19c (#8101)
parent
60f75560
Changes
273
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
650 additions
and
530 deletions
+650
-530
llama/ggml-cpu-impl.h
llama/ggml-cpu-impl.h
+1
-1
llama/ggml-cpu-quants.c
llama/ggml-cpu-quants.c
+1
-1
llama/ggml-cpu-quants.h
llama/ggml-cpu-quants.h
+1
-1
llama/ggml-cpu-traits.cpp
llama/ggml-cpu-traits.cpp
+62
-0
llama/ggml-cpu-traits.h
llama/ggml-cpu-traits.h
+28
-9
llama/ggml-cpu.c
llama/ggml-cpu.c
+501
-348
llama/ggml-cpu.cpp
llama/ggml-cpu.cpp
+43
-140
llama/ggml-cpu.h
llama/ggml-cpu.h
+1
-18
llama/ggml-cuda.h
llama/ggml-cuda.h
+1
-1
llama/ggml-cuda/acc.cu
llama/ggml-cuda/acc.cu
+1
-1
llama/ggml-cuda/acc.cuh
llama/ggml-cuda/acc.cuh
+1
-1
llama/ggml-cuda/arange.cu
llama/ggml-cuda/arange.cu
+1
-1
llama/ggml-cuda/arange.cuh
llama/ggml-cuda/arange.cuh
+1
-1
llama/ggml-cuda/argmax.cu
llama/ggml-cuda/argmax.cu
+1
-1
llama/ggml-cuda/argmax.cuh
llama/ggml-cuda/argmax.cuh
+1
-1
llama/ggml-cuda/argsort.cu
llama/ggml-cuda/argsort.cu
+1
-1
llama/ggml-cuda/argsort.cuh
llama/ggml-cuda/argsort.cuh
+1
-1
llama/ggml-cuda/binbcast.cu
llama/ggml-cuda/binbcast.cu
+1
-1
llama/ggml-cuda/binbcast.cuh
llama/ggml-cuda/binbcast.cuh
+1
-1
llama/ggml-cuda/clamp.cu
llama/ggml-cuda/clamp.cu
+1
-1
No files found.
llama/ggml-cpu-impl.h
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cpu-quants.c
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cpu-quants.h
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cpu-traits.cpp
0 → 100644
View file @
7a81daf0
/**
* llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file
*
* MIT License
*
* Copyright (c) 2023-2024 The ggml authors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ggml-cpu-traits.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
namespace
ggml
::
cpu
{
tensor_traits
::~
tensor_traits
()
{}
extra_buffer_type
::~
extra_buffer_type
()
{}
}
// namespace ggml::cpu
bool
ggml_cpu_extra_compute_forward
(
struct
ggml_compute_params
*
params
,
struct
ggml_tensor
*
op
)
{
for
(
auto
extra
:
ggml_backend_cpu_get_extra_buffers_type
())
{
if
(
extra
&&
extra
->
context
)
{
auto
buf_extra
=
(
ggml
::
cpu
::
extra_buffer_type
*
)
extra
->
context
;
auto
tensor_traits
=
buf_extra
->
get_tensor_traits
(
op
);
if
(
tensor_traits
&&
tensor_traits
->
compute_forward
(
params
,
op
))
{
return
true
;
}
}
}
return
false
;
}
bool
ggml_cpu_extra_work_size
(
int
n_threads
,
const
struct
ggml_tensor
*
op
,
size_t
*
size
)
{
for
(
auto
extra
:
ggml_backend_cpu_get_extra_buffers_type
())
{
if
(
extra
&&
extra
->
context
)
{
auto
buf_extra
=
(
ggml
::
cpu
::
extra_buffer_type
*
)
extra
->
context
;
auto
tensor_traits
=
buf_extra
->
get_tensor_traits
(
op
);
if
(
tensor_traits
&&
tensor_traits
->
work_size
(
n_threads
,
op
,
*
size
))
{
return
true
;
}
}
}
return
false
;
}
llama/ggml-
aarch64
.h
→
llama/ggml-
cpu-traits
.h
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
@@ -25,21 +25,40 @@
...
@@ -25,21 +25,40 @@
*/
*/
#pragma once
#pragma once
#include "ggml-backend-impl.h"
#include "ggml-cpu-impl.h"
#include "ggml.h"
#include "ggml.h"
// GGML internal header
#ifdef __cplusplus
#ifdef __cplusplus
# include <vector>
extern
"C"
{
extern
"C"
{
#endif
#endif
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
// return true if op part of extra "accelerator"
size_t
quantize_q4_0_4x4
(
const
float
*
GGML_RESTRICT
src
,
void
*
GGML_RESTRICT
dst
,
int64_t
nrows
,
int64_t
n_per_row
,
const
float
*
imatrix
);
bool
ggml_cpu_extra_compute_forward
(
struct
ggml_compute_params
*
params
,
struct
ggml_tensor
*
op
);
size_t
quantize_q4_0_4x8
(
const
float
*
GGML_RESTRICT
src
,
void
*
GGML_RESTRICT
dst
,
int64_t
nrows
,
int64_t
n_per_row
,
const
float
*
imatrix
);
bool
ggml_cpu_extra_work_size
(
int
n_threads
,
const
struct
ggml_tensor
*
op
,
size_t
*
size
);
size_t
quantize_q4_0_8x8
(
const
float
*
GGML_RESTRICT
src
,
void
*
GGML_RESTRICT
dst
,
int64_t
nrows
,
int64_t
n_per_row
,
const
float
*
imatrix
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
namespace
ggml
::
cpu
{
// register in tensor->extra
class
tensor_traits
{
public:
virtual
~
tensor_traits
();
virtual
bool
work_size
(
int
n_threads
,
const
struct
ggml_tensor
*
op
,
size_t
&
size
)
=
0
;
virtual
bool
compute_forward
(
struct
ggml_compute_params
*
params
,
struct
ggml_tensor
*
op
)
=
0
;
};
class
extra_buffer_type
{
public:
virtual
~
extra_buffer_type
();
virtual
bool
supports_op
(
ggml_backend_dev_t
dev
,
const
struct
ggml_tensor
*
op
)
=
0
;
virtual
tensor_traits
*
get_tensor_traits
(
const
struct
ggml_tensor
*
op
)
=
0
;
};
}
// namespace ggml::cpu
// implemented in ggml-cpu.cpp.
std
::
vector
<
ggml_backend_buffer_type_t
>
&
ggml_backend_cpu_get_extra_buffers_type
();
#endif
llama/ggml-cpu.c
View file @
7a81daf0
This diff is collapsed.
Click to expand it.
llama/ggml-cpu.cpp
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
@@ -28,12 +28,17 @@
...
@@ -28,12 +28,17 @@
#include "ggml-backend-impl.h"
#include "ggml-backend-impl.h"
#include "ggml-cpu.h"
#include "ggml-cpu.h"
#include "ggml-cpu-aarch64.h"
#include "ggml-cpu-aarch64.h"
#include "ggml-cpu-traits.h"
#include "ggml-impl.h"
#include "ggml-impl.h"
#include "amx.h"
#include "amx.h"
#include <cctype>
#include <cctype>
#include <string>
#include <string>
#include <vector>
#include <vector>
#ifdef GGML_USE_CPU_HBM
#include "ggml-cpu-hbm.h"
#endif
#if defined(__APPLE__)
#if defined(__APPLE__)
#include <sys/types.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/sysctl.h>
...
@@ -49,115 +54,7 @@
...
@@ -49,115 +54,7 @@
// ggml-backend interface
// ggml-backend interface
#ifdef GGML_USE_CPU_HBM
std
::
vector
<
ggml_backend_buffer_type_t
>&
ggml_backend_cpu_get_extra_buffers_type
()
{
// buffer type HBM
#include <hbwmalloc.h>
static
const
char
*
ggml_backend_cpu_hbm_buffer_type_get_name
(
ggml_backend_buffer_type_t
buft
)
{
return
"CPU_HBM"
;
GGML_UNUSED
(
buft
);
}
static
void
ggml_backend_cpu_hbm_buffer_free_buffer
(
ggml_backend_buffer_t
buffer
)
{
hbw_free
(
buffer
->
context
);
}
static
ggml_backend_buffer_t
ggml_backend_cpu_hbm_buffer_type_alloc_buffer
(
ggml_backend_buffer_type_t
buft
,
size_t
size
)
{
void
*
ptr
;
int
result
=
hbw_posix_memalign
(
&
ptr
,
ggml_backend_cpu_buffer_type_get_alignment
(
buft
),
size
);
if
(
result
!=
0
)
{
GGML_LOG_ERROR
(
"failed to allocate HBM buffer of size %zu
\n
"
,
size
);
return
NULL
;
}
ggml_backend_buffer_t
buffer
=
ggml_backend_cpu_buffer_from_ptr
(
ptr
,
size
);
buffer
->
buft
=
buft
;
buffer
->
iface
.
free_buffer
=
ggml_backend_cpu_hbm_buffer_free_buffer
;
return
buffer
;
}
ggml_backend_buffer_type_t
ggml_backend_cpu_hbm_buffer_type
(
void
)
{
static
struct
ggml_backend_buffer_type
ggml_backend_cpu_buffer_type_hbm
=
{
/* .iface = */
{
/* .get_name = */
ggml_backend_cpu_hbm_buffer_type_get_name
,
/* .alloc_buffer = */
ggml_backend_cpu_hbm_buffer_type_alloc_buffer
,
/* .get_alignment = */
ggml_backend_cpu_buffer_type_get_alignment
,
/* .get_max_size = */
NULL
,
// defaults to SIZE_MAX
/* .get_alloc_size = */
NULL
,
// defaults to ggml_nbytes
/* .is_host = */
ggml_backend_cpu_buffer_type_is_host
,
},
/* .context = */
NULL
,
};
return
&
ggml_backend_cpu_buffer_type_hbm
;
}
#endif
// buffer type AARCH64
static
void
ggml_backend_cpu_aarch64_buffer_init_tensor
(
ggml_backend_buffer_t
buffer
,
struct
ggml_tensor
*
tensor
)
{
tensor
->
extra
=
(
void
*
)
ggml_aarch64_get_optimal_repack_type
(
tensor
);
// NOLINT
GGML_UNUSED
(
buffer
);
}
static
void
ggml_backend_cpu_aarch64_buffer_set_tensor
(
ggml_backend_buffer_t
buffer
,
struct
ggml_tensor
*
tensor
,
const
void
*
data
,
size_t
offset
,
size_t
size
)
{
GGML_ASSERT
(
offset
==
0
);
GGML_ASSERT
(
size
==
ggml_nbytes
(
tensor
));
enum
ggml_type
repack_type
=
(
enum
ggml_type
)(
intptr_t
)
tensor
->
extra
;
ggml_aarch64_repack_tensor
(
tensor
,
repack_type
,
data
,
size
);
GGML_UNUSED
(
buffer
);
}
static
const
char
*
ggml_backend_cpu_aarch64_buffer_type_get_name
(
ggml_backend_buffer_type_t
buft
)
{
return
"CPU_AARCH64"
;
GGML_UNUSED
(
buft
);
}
static
ggml_backend_buffer_t
ggml_backend_cpu_aarch64_buffer_type_alloc_buffer
(
ggml_backend_buffer_type_t
buft
,
size_t
size
)
{
auto
*
buffer
=
ggml_backend_buft_alloc_buffer
(
ggml_backend_cpu_buffer_type
(),
size
);
if
(
buffer
==
NULL
)
{
return
NULL
;
}
buffer
->
buft
=
buft
;
buffer
->
iface
.
init_tensor
=
ggml_backend_cpu_aarch64_buffer_init_tensor
;
buffer
->
iface
.
set_tensor
=
ggml_backend_cpu_aarch64_buffer_set_tensor
;
return
buffer
;
}
ggml_backend_buffer_type_t
ggml_backend_cpu_aarch64_buffer_type
(
void
)
{
static
struct
ggml_backend_buffer_type
ggml_backend_cpu_buffer_type_aarch64
=
{
/* .iface = */
{
/* .get_name = */
ggml_backend_cpu_aarch64_buffer_type_get_name
,
/* .alloc_buffer = */
ggml_backend_cpu_aarch64_buffer_type_alloc_buffer
,
/* .get_alignment = */
ggml_backend_cpu_buffer_type
()
->
iface
.
get_alignment
,
/* .get_max_size = */
NULL
,
// defaults to SIZE_MAX
/* .get_alloc_size = */
NULL
,
// defaults to ggml_nbytes
/* .is_host = */
NULL
,
},
/* .device = */
ggml_backend_reg_dev_get
(
ggml_backend_cpu_reg
(),
0
),
/* .context = */
NULL
,
};
return
&
ggml_backend_cpu_buffer_type_aarch64
;
}
bool
ggml_backend_cpu_buft_is_aarch64
(
ggml_backend_buffer_type_t
buft
)
{
return
buft
==
ggml_backend_cpu_aarch64_buffer_type
();
}
static
ggml_backend_buffer_type_t
*
ggml_backend_cpu_get_extra_bufts
(
ggml_backend_dev_t
device
)
{
static
std
::
vector
<
ggml_backend_buffer_type_t
>
bufts
=
[]()
{
static
std
::
vector
<
ggml_backend_buffer_type_t
>
bufts
=
[]()
{
std
::
vector
<
ggml_backend_buffer_type_t
>
bufts
;
std
::
vector
<
ggml_backend_buffer_type_t
>
bufts
;
...
@@ -178,11 +75,22 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backen
...
@@ -178,11 +75,22 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backen
return
bufts
;
return
bufts
;
}();
}();
return
bufts
.
data
();
return
bufts
;
}
static
ggml_backend_buffer_type_t
*
ggml_backend_cpu_device_get_extra_buffers_type
(
ggml_backend_dev_t
device
)
{
return
ggml_backend_cpu_get_extra_buffers_type
().
data
();
GGML_UNUSED
(
device
);
GGML_UNUSED
(
device
);
}
}
static
bool
ggml_backend_cpu_is_extra_buffer_type
(
ggml_backend_buffer_type_t
buft
)
{
for
(
auto
extra
:
ggml_backend_cpu_get_extra_buffers_type
())
{
if
(
extra
&&
extra
==
buft
)
return
true
;
}
return
false
;
}
// CPU backend - backend (stream)
// CPU backend - backend (stream)
struct
ggml_backend_cpu_context
{
struct
ggml_backend_cpu_context
{
...
@@ -491,25 +399,19 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
...
@@ -491,25 +399,19 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
return
true
;
return
true
;
}
}
if
(
src0
&&
src0
->
buffer
&&
ggml_backend_cpu_buft_is_aarch64
(
src0
->
buffer
->
buft
))
{
// extra_buffer_op?
if
(
op
->
op
!=
GGML_OP_MUL_MAT
||
src0
->
type
==
ggml_aarch64_get_optimal_repack_type
(
src0
))
{
for
(
auto
extra
:
ggml_backend_cpu_get_extra_buffers_type
())
{
return
false
;
if
(
extra
)
{
}
auto
buf_extra
=
(
ggml
::
cpu
::
extra_buffer_type
*
)
extra
->
context
;
}
if
(
buf_extra
&&
buf_extra
->
supports_op
(
dev
,
op
))
{
return
true
;
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
if
(
src0
&&
src0
->
buffer
&&
ggml_backend_amx_buft_is_amx
(
src0
->
buffer
->
buft
))
{
return
ggml_backend_amx_device_supports_op
(
op
);
}
}
for
(
int
i
=
1
;
i
<
GGML_MAX_SRC
;
i
++
)
{
if
(
op
->
src
[
i
]
&&
op
->
src
[
i
]
->
buffer
&&
ggml_backend_amx_buft_is_amx
(
op
->
src
[
i
]
->
buffer
->
buft
))
{
return
false
;
}
}
}
}
#endif
for
(
int
i
=
1
;
i
<
GGML_MAX_SRC
;
i
++
)
{
// the other case need host buffer.
if
(
op
->
src
[
i
]
&&
op
->
src
[
i
]
->
buffer
&&
ggml_backend_cpu_buft_is_aarch64
(
op
->
src
[
i
]
->
buffer
->
buft
))
{
for
(
int
i
=
0
;
i
<
GGML_MAX_SRC
;
i
++
)
{
if
(
op
->
src
[
i
]
&&
op
->
src
[
i
]
->
buffer
&&
!
ggml_backend_buft_is_host
(
op
->
src
[
i
]
->
buffer
->
buft
))
{
return
false
;
return
false
;
}
}
}
}
...
@@ -532,19 +434,10 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
...
@@ -532,19 +434,10 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
default:
default:
return
true
;
return
true
;
}
}
GGML_UNUSED
(
dev
);
}
}
static
bool
ggml_backend_cpu_device_supports_buft
(
ggml_backend_dev_t
dev
,
ggml_backend_buffer_type_t
buft
)
{
static
bool
ggml_backend_cpu_device_supports_buft
(
ggml_backend_dev_t
dev
,
ggml_backend_buffer_type_t
buft
)
{
bool
supported
=
ggml_backend_buft_is_host
(
buft
)
||
ggml_backend_cpu_buft_is_aarch64
(
buft
);
return
ggml_backend_buft_is_host
(
buft
)
||
ggml_backend_cpu_is_extra_buffer_type
(
buft
);
#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
supported
=
supported
||
ggml_backend_amx_buft_is_amx
(
buft
);
#endif
return
supported
;
GGML_UNUSED
(
dev
);
GGML_UNUSED
(
dev
);
}
}
...
@@ -667,7 +560,15 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
...
@@ -667,7 +560,15 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
if
(
ggml_cpu_has_llamafile
())
{
if
(
ggml_cpu_has_llamafile
())
{
features
.
push_back
({
"LLAMAFILE"
,
"1"
});
features
.
push_back
({
"LLAMAFILE"
,
"1"
});
}
}
// TODO: rename this
#ifdef GGML_USE_ACCELERATE
features
.
push_back
({
"ACCELERATE"
,
"1"
});
#endif
#ifdef GGML_USE_CPU_HBM
features
.
push_back
({
"CPU_HBM"
,
"1"
});
#endif
#ifdef GGML_USE_OPENMP
features
.
push_back
({
"OPENMP"
,
"1"
});
#endif
#ifdef GGML_USE_CPU_AARCH64
#ifdef GGML_USE_CPU_AARCH64
features
.
push_back
({
"AARCH64_REPACK"
,
"1"
});
features
.
push_back
({
"AARCH64_REPACK"
,
"1"
});
#endif
#endif
...
@@ -684,10 +585,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
...
@@ -684,10 +585,12 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
static
void
*
ggml_backend_cpu_get_proc_address
(
ggml_backend_reg_t
reg
,
const
char
*
name
)
{
static
void
*
ggml_backend_cpu_get_proc_address
(
ggml_backend_reg_t
reg
,
const
char
*
name
)
{
if
(
strcmp
(
name
,
"ggml_backend_set_n_threads"
)
==
0
)
{
if
(
strcmp
(
name
,
"ggml_backend_set_n_threads"
)
==
0
)
{
return
(
void
*
)
ggml_backend_cpu_set_n_threads
;
ggml_backend_set_n_threads_t
fct
=
ggml_backend_cpu_set_n_threads
;
return
(
void
*
)
fct
;
}
}
if
(
strcmp
(
name
,
"ggml_backend_dev_get_extra_bufts"
)
==
0
)
{
if
(
strcmp
(
name
,
"ggml_backend_dev_get_extra_bufts"
)
==
0
)
{
return
(
void
*
)
ggml_backend_cpu_get_extra_bufts
;
ggml_backend_dev_get_extra_bufts_t
fct
=
ggml_backend_cpu_device_get_extra_buffers_type
;
return
(
void
*
)
fct
;
}
}
if
(
strcmp
(
name
,
"ggml_backend_get_features"
)
==
0
)
{
if
(
strcmp
(
name
,
"ggml_backend_get_features"
)
==
0
)
{
return
(
void
*
)
ggml_backend_cpu_get_features
;
return
(
void
*
)
ggml_backend_cpu_get_features
;
...
...
llama/ggml-cpu.h
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
@@ -129,24 +129,14 @@ extern "C" {
...
@@ -129,24 +129,14 @@ extern "C" {
// Internal types and functions exposed for tests and benchmarks
// Internal types and functions exposed for tests and benchmarks
typedef
void
(
*
ggml_from_float_to_mat_t
)
(
const
float
*
GGML_RESTRICT
x
,
void
*
GGML_RESTRICT
y
,
int64_t
nr
,
int64_t
k
,
int64_t
bs
);
typedef
void
(
*
ggml_vec_dot_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
size_t
bx
,
typedef
void
(
*
ggml_vec_dot_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
size_t
bx
,
const
void
*
GGML_RESTRICT
y
,
size_t
by
,
int
nrc
);
const
void
*
GGML_RESTRICT
y
,
size_t
by
,
int
nrc
);
typedef
void
(
*
ggml_gemv_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
const
void
*
GGML_RESTRICT
y
,
int
nr
,
int
nc
);
typedef
void
(
*
ggml_gemm_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
const
void
*
GGML_RESTRICT
y
,
int
nr
,
int
nc
);
struct
ggml_type_traits_cpu
{
struct
ggml_type_traits_cpu
{
ggml_from_float_t
from_float
;
ggml_from_float_t
from_float
;
ggml_from_float_to_mat_t
from_float_to_mat
;
ggml_vec_dot_t
vec_dot
;
ggml_vec_dot_t
vec_dot
;
enum
ggml_type
vec_dot_type
;
enum
ggml_type
vec_dot_type
;
int64_t
nrows
;
// number of rows to process simultaneously
int64_t
nrows
;
// number of rows to process simultaneously
int64_t
ncols
;
// number of columns to process simultaneously
ggml_gemv_t
gemv
;
ggml_gemm_t
gemm
;
};
};
GGML_BACKEND_API
const
struct
ggml_type_traits_cpu
*
ggml_get_type_traits_cpu
(
enum
ggml_type
type
);
GGML_BACKEND_API
const
struct
ggml_type_traits_cpu
*
ggml_get_type_traits_cpu
(
enum
ggml_type
type
);
...
@@ -166,13 +156,6 @@ extern "C" {
...
@@ -166,13 +156,6 @@ extern "C" {
GGML_BACKEND_API
ggml_backend_reg_t
ggml_backend_cpu_reg
(
void
);
GGML_BACKEND_API
ggml_backend_reg_t
ggml_backend_cpu_reg
(
void
);
#ifdef GGML_USE_CPU_HBM
GGML_BACKEND_API
ggml_backend_buffer_type_t
ggml_backend_cpu_hbm_buffer_type
(
void
);
#endif
GGML_BACKEND_API
ggml_backend_buffer_type_t
ggml_backend_cpu_aarch64_buffer_type
(
void
);
GGML_BACKEND_API
bool
ggml_backend_cpu_buft_is_aarch64
(
ggml_backend_buffer_type_t
buft
);
#ifdef __cplusplus
#ifdef __cplusplus
}
}
#endif
#endif
llama/ggml-cuda.h
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/acc.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/acc.cuh
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/arange.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/arange.cuh
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/argmax.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/argmax.cuh
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/argsort.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/argsort.cuh
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/binbcast.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/binbcast.cuh
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
llama/ggml-cuda/clamp.cu
View file @
7a81daf0
/**
/**
* llama.cpp - commit
40c6d79fb52f995f47507fedfeaae2ac05d9b35c
- do not edit this file
* llama.cpp - commit
ba1cb19cdd0d92e012e0f6e009e0620f854b6afd
- do not edit this file
*
*
* MIT License
* MIT License
*
*
...
...
Prev
1
2
3
4
5
6
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment