Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
llama.cpp
Commits
97ef6ff8
Commit
97ef6ff8
authored
Dec 02, 2024
by
xuxzh1
🎱
Browse files
update
parent
4cc1a614
Pipeline
#2023
canceled with stages
Changes
202
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
335 additions
and
1607 deletions
+335
-1607
ggml/include/ggml.h
ggml/include/ggml.h
+266
-501
ggml/src/CMakeLists.txt
ggml/src/CMakeLists.txt
+69
-1106
No files found.
Too many changes to show.
To preserve performance only
202 of 202+
files are displayed.
Plain diff
Email patch
ggml/include/ggml.h
View file @
97ef6ff8
...
...
@@ -176,25 +176,15 @@
#ifdef GGML_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef GGML_BUILD
# define GGML_API __declspec(dllexport)
# define GGML_API __declspec(dllexport)
extern
# else
# define GGML_API __declspec(dllimport)
# define GGML_API __declspec(dllimport)
extern
# endif
# else
# define GGML_API __attribute__ ((visibility ("default")))
# define GGML_API __attribute__ ((visibility ("default")))
extern
# endif
#else
# define GGML_API
#endif
#ifdef GGML_MULTIPLATFORM
# if defined(_WIN32)
# define GGML_CALL
# else
# define GGML_CALL __attribute__((__ms_abi__))
# endif
#else
# define GGML_CALL
# define GGML_API extern
#endif
// TODO: support for clang
...
...
@@ -220,21 +210,24 @@
#include <stdio.h>
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
#define GGML_FILE_VERSION
1
#define GGML_FILE_VERSION
2
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
#define GGML_MAX_DIMS 4
#define GGML_MAX_PARAMS 2048
#define GGML_MAX_CONTEXTS 64
#define GGML_MAX_SRC 10
#define GGML_MAX_N_THREADS 512
#define GGML_MAX_OP_PARAMS 64
#ifndef GGML_MAX_NAME
#define GGML_MAX_NAME
64
#
define GGML_MAX_NAME 64
#endif
#define GGML_MAX_OP_PARAMS 64
#define GGML_DEFAULT_N_THREADS 4
#define GGML_DEFAULT_GRAPH_SIZE 2048
#if UINTPTR_MAX == 0xFFFFFFFF
#define GGML_MEM_ALIGN 4
#else
...
...
@@ -244,6 +237,8 @@
#define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1
#define GGML_ROPE_TYPE_NEOX 2
#define GGUF_MAGIC "GGUF"
#define GGUF_VERSION 3
...
...
@@ -255,21 +250,21 @@
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#ifndef NDEBUG
#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#
define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable()
#
define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER)
#define GGML_UNREACHABLE() __assume(0)
#
define GGML_UNREACHABLE() __assume(0)
#else
#define GGML_UNREACHABLE() ((void) 0)
#
define GGML_UNREACHABLE() ((void) 0)
#endif
#ifdef __cplusplus
#define GGML_NORETURN [[noreturn]]
#
define GGML_NORETURN [[noreturn]]
#elif defined(_MSC_VER)
#define GGML_NORETURN __declspec(noreturn)
#
define GGML_NORETURN __declspec(noreturn)
#else
#define GGML_NORETURN _Noreturn
#
define GGML_NORETURN _Noreturn
#endif
#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
...
...
@@ -334,7 +329,7 @@ extern "C" {
};
// get ggml_status name string
GGML_API
GGML_CALL
const
char
*
ggml_status_to_string
(
enum
ggml_status
status
);
GGML_API
const
char
*
ggml_status_to_string
(
enum
ggml_status
status
);
// ieee 754-2008 half-precision float16
// todo: make this not an integral type
...
...
@@ -354,6 +349,7 @@ extern "C" {
struct
ggml_object
;
struct
ggml_context
;
struct
ggml_cgraph
;
// NOTE: always add types at the end of the enum to keep backward compatibility
enum
ggml_type
{
...
...
@@ -391,6 +387,8 @@ extern "C" {
GGML_TYPE_Q4_0_4_4
=
31
,
GGML_TYPE_Q4_0_4_8
=
32
,
GGML_TYPE_Q4_0_8_8
=
33
,
GGML_TYPE_TQ1_0
=
34
,
GGML_TYPE_TQ2_0
=
35
,
GGML_TYPE_COUNT
,
};
...
...
@@ -451,10 +449,13 @@ extern "C" {
GGML_OP_SQR
,
GGML_OP_SQRT
,
GGML_OP_LOG
,
GGML_OP_SIN
,
GGML_OP_COS
,
GGML_OP_SUM
,
GGML_OP_SUM_ROWS
,
GGML_OP_MEAN
,
GGML_OP_ARGMAX
,
GGML_OP_COUNT_EQUAL
,
GGML_OP_REPEAT
,
GGML_OP_REPEAT_BACK
,
GGML_OP_CONCAT
,
...
...
@@ -488,9 +489,11 @@ extern "C" {
GGML_OP_CLAMP
,
GGML_OP_CONV_TRANSPOSE_1D
,
GGML_OP_IM2COL
,
GGML_OP_IM2COL_BACK
,
GGML_OP_CONV_TRANSPOSE_2D
,
GGML_OP_POOL_1D
,
GGML_OP_POOL_2D
,
GGML_OP_POOL_2D_BACK
,
GGML_OP_UPSCALE
,
// nearest interpolate
GGML_OP_PAD
,
GGML_OP_ARANGE
,
...
...
@@ -506,6 +509,7 @@ extern "C" {
GGML_OP_WIN_UNPART
,
GGML_OP_GET_REL_POS
,
GGML_OP_ADD_REL_POS
,
GGML_OP_RWKV_WKV6
,
GGML_OP_UNARY
,
...
...
@@ -522,6 +526,7 @@ extern "C" {
GGML_OP_CROSS_ENTROPY_LOSS
,
GGML_OP_CROSS_ENTROPY_LOSS_BACK
,
GGML_OP_OPT_STEP_ADAMW
,
GGML_OP_COUNT
,
};
...
...
@@ -540,6 +545,7 @@ extern "C" {
GGML_UNARY_OP_SILU
,
GGML_UNARY_OP_HARDSWISH
,
GGML_UNARY_OP_HARDSIGMOID
,
GGML_UNARY_OP_EXP
,
GGML_UNARY_OP_COUNT
,
};
...
...
@@ -551,35 +557,32 @@ extern "C" {
};
enum
ggml_log_level
{
GGML_LOG_LEVEL_ERROR
=
2
,
GGML_LOG_LEVEL_NONE
=
0
,
GGML_LOG_LEVEL_DEBUG
=
1
,
GGML_LOG_LEVEL_INFO
=
2
,
GGML_LOG_LEVEL_WARN
=
3
,
GGML_LOG_LEVEL_
INFO
=
4
,
GGML_LOG_LEVEL_
DEBUG
=
5
GGML_LOG_LEVEL_
ERROR
=
4
,
GGML_LOG_LEVEL_
CONT
=
5
,
// continue previous log
};
// this tensor...
enum
ggml_tensor_flag
{
GGML_TENSOR_FLAG_INPUT
=
1
,
GGML_TENSOR_FLAG_OUTPUT
=
2
,
GGML_TENSOR_FLAG_PARAM
=
4
,
GGML_TENSOR_FLAG_INPUT
=
1
,
// ...is an input for the GGML compute graph
GGML_TENSOR_FLAG_OUTPUT
=
2
,
// ...is an output for the GGML compute graph
GGML_TENSOR_FLAG_PARAM
=
4
,
// ...contains trainable parameters
GGML_TENSOR_FLAG_LOSS
=
8
,
// ...defines loss for numerical optimization (multiple loss tensors add up)
};
// ggml object
struct
ggml_object
{
size_t
offs
;
size_t
size
;
struct
ggml_object
*
next
;
enum
ggml_object_type
type
;
char
padding
[
4
];
struct
ggml_init_params
{
// memory pool
size_t
mem_size
;
// bytes
void
*
mem_buffer
;
// if NULL, memory will be allocated internally
bool
no_alloc
;
// don't allocate memory for the tensor data
};
static
const
size_t
GGML_OBJECT_SIZE
=
sizeof
(
struct
ggml_object
);
// n-dimensional tensor
struct
ggml_tensor
{
enum
ggml_type
type
;
enum
ggml_type
type
;
GGML_DEPRECATED
(
enum
ggml_backend_type
backend
,
"use the buffer type to find the storage location of the tensor"
);
...
...
@@ -599,7 +602,6 @@ extern "C" {
int32_t
flags
;
struct
ggml_tensor
*
grad
;
struct
ggml_tensor
*
src
[
GGML_MAX_SRC
];
// source tensor and offset for views
...
...
@@ -612,7 +614,7 @@ extern "C" {
void
*
extra
;
// extra things e.g. for ggml-cuda.cu
//
char padding[
4
];
char
padding
[
8
];
};
static
const
size_t
GGML_TENSOR_SIZE
=
sizeof
(
struct
ggml_tensor
);
...
...
@@ -622,71 +624,6 @@ extern "C" {
// If it returns true, the computation is aborted
typedef
bool
(
*
ggml_abort_callback
)(
void
*
data
);
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
struct
ggml_cplan
{
size_t
work_size
;
// size of work buffer, calculated by `ggml_graph_plan()`
uint8_t
*
work_data
;
// work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
int
n_threads
;
// abort ggml_graph_compute when true
ggml_abort_callback
abort_callback
;
void
*
abort_callback_data
;
};
enum
ggml_cgraph_eval_order
{
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT
=
0
,
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT
,
GGML_CGRAPH_EVAL_ORDER_COUNT
};
typedef
uint32_t
ggml_bitset_t
;
struct
ggml_hash_set
{
size_t
size
;
ggml_bitset_t
*
used
;
struct
ggml_tensor
**
keys
;
};
// computation graph
struct
ggml_cgraph
{
int
size
;
int
n_nodes
;
int
n_leafs
;
struct
ggml_tensor
**
nodes
;
struct
ggml_tensor
**
grads
;
struct
ggml_tensor
**
leafs
;
struct
ggml_hash_set
visited_hash_set
;
enum
ggml_cgraph_eval_order
order
;
};
// scratch buffer
struct
ggml_scratch
{
size_t
offs
;
size_t
size
;
void
*
data
;
};
struct
ggml_init_params
{
// memory pool
size_t
mem_size
;
// bytes
void
*
mem_buffer
;
// if NULL, memory will be allocated internally
bool
no_alloc
;
// don't allocate memory for the tensor data
};
// numa strategies
enum
ggml_numa_strategy
{
GGML_NUMA_STRATEGY_DISABLED
=
0
,
GGML_NUMA_STRATEGY_DISTRIBUTE
=
1
,
GGML_NUMA_STRATEGY_ISOLATE
=
2
,
GGML_NUMA_STRATEGY_NUMACTL
=
3
,
GGML_NUMA_STRATEGY_MIRROR
=
4
,
GGML_NUMA_STRATEGY_COUNT
};
//
// GUID
...
...
@@ -709,52 +646,49 @@ extern "C" {
// accepts a UTF-8 path, even on Windows
GGML_API
FILE
*
ggml_fopen
(
const
char
*
fname
,
const
char
*
mode
);
GGML_API
void
ggml_numa_init
(
enum
ggml_numa_strategy
numa
);
// call once for better performance on NUMA systems
GGML_API
bool
ggml_is_numa
(
void
);
// true if init detected that system has >1 NUMA node
GGML_API
void
ggml_print_object
(
const
struct
ggml_object
*
obj
);
GGML_API
void
ggml_print_objects
(
const
struct
ggml_context
*
ctx
);
GGML_API
GGML_CALL
int64_t
ggml_nelements
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
int64_t
ggml_nrows
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
size_t
ggml_nbytes
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
size_t
ggml_nbytes_pad
(
const
struct
ggml_tensor
*
tensor
);
// same as ggml_nbytes() but padded to GGML_MEM_ALIGN
GGML_API
int64_t
ggml_nelements
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
int64_t
ggml_nrows
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
size_t
ggml_nbytes
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
size_t
ggml_nbytes_pad
(
const
struct
ggml_tensor
*
tensor
);
// same as ggml_nbytes() but padded to GGML_MEM_ALIGN
GGML_API
GGML_CALL
int64_t
ggml_blck_size
(
enum
ggml_type
type
);
GGML_API
GGML_CALL
size_t
ggml_type_size
(
enum
ggml_type
type
);
// size in bytes for all elements in a block
GGML_API
GGML_CALL
size_t
ggml_row_size
(
enum
ggml_type
type
,
int64_t
ne
);
// size in bytes for all elements in a row
GGML_API
int64_t
ggml_blck_size
(
enum
ggml_type
type
);
GGML_API
size_t
ggml_type_size
(
enum
ggml_type
type
);
// size in bytes for all elements in a block
GGML_API
size_t
ggml_row_size
(
enum
ggml_type
type
,
int64_t
ne
);
// size in bytes for all elements in a row
GGML_DEPRECATED
(
GGML_API
double
ggml_type_sizef
(
enum
ggml_type
type
),
// ggml_type_size()/ggml_blck_size() as float
"use ggml_row_size() instead"
);
GGML_API
GGML_CALL
const
char
*
ggml_type_name
(
enum
ggml_type
type
);
GGML_API
GGML_CALL
const
char
*
ggml_op_name
(
enum
ggml_op
op
);
GGML_API
const
char
*
ggml_op_symbol
(
enum
ggml_op
op
);
GGML_API
const
char
*
ggml_type_name
(
enum
ggml_type
type
);
GGML_API
const
char
*
ggml_op_name
(
enum
ggml_op
op
);
GGML_API
const
char
*
ggml_op_symbol
(
enum
ggml_op
op
);
GGML_API
const
char
*
ggml_unary_op_name
(
enum
ggml_unary_op
op
);
GGML_API
GGML_CALL
const
char
*
ggml_op_desc
(
const
struct
ggml_tensor
*
t
);
// unary or op name
GGML_API
const
char
*
ggml_unary_op_name
(
enum
ggml_unary_op
op
);
GGML_API
const
char
*
ggml_op_desc
(
const
struct
ggml_tensor
*
t
);
// unary or op name
GGML_API
GGML_CALL
size_t
ggml_element_size
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
size_t
ggml_element_size
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
bool
ggml_is_quantized
(
enum
ggml_type
type
);
GGML_API
bool
ggml_is_quantized
(
enum
ggml_type
type
);
// TODO: temporary until model loading of ggml examples is refactored
GGML_API
enum
ggml_type
ggml_ftype_to_ggml_type
(
enum
ggml_ftype
ftype
);
GGML_API
GGML_CALL
bool
ggml_is_transposed
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
bool
ggml_is_permuted
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
bool
ggml_is_empty
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_scalar
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_vector
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_matrix
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_3d
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
int
ggml_n_dims
(
const
struct
ggml_tensor
*
tensor
);
// returns 1 for scalars
GGML_API
bool
ggml_is_transposed
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_permuted
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_empty
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_scalar
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_vector
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_matrix
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_3d
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
int
ggml_n_dims
(
const
struct
ggml_tensor
*
tensor
);
// returns 1 for scalars
GGML_API
GGML_CALL
bool
ggml_is_contiguous
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
bool
ggml_is_contiguous_0
(
const
struct
ggml_tensor
*
tensor
);
// same as ggml_is_contiguous()
GGML_API
GGML_CALL
bool
ggml_is_contiguous_1
(
const
struct
ggml_tensor
*
tensor
);
// contiguous for dims >= 1
GGML_API
GGML_CALL
bool
ggml_is_contiguous_2
(
const
struct
ggml_tensor
*
tensor
);
// contiguous for dims >= 2
GGML_API
bool
ggml_is_contiguous
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_contiguous_0
(
const
struct
ggml_tensor
*
tensor
);
// same as ggml_is_contiguous()
GGML_API
bool
ggml_is_contiguous_1
(
const
struct
ggml_tensor
*
tensor
);
// contiguous for dims >= 1
GGML_API
bool
ggml_is_contiguous_2
(
const
struct
ggml_tensor
*
tensor
);
// contiguous for dims >= 2
GGML_API
bool
ggml_are_same_shape
(
const
struct
ggml_tensor
*
t0
,
const
struct
ggml_tensor
*
t1
);
GGML_API
bool
ggml_are_same_stride
(
const
struct
ggml_tensor
*
t0
,
const
struct
ggml_tensor
*
t1
);
...
...
@@ -768,12 +702,12 @@ extern "C" {
// main
GGML_API
struct
ggml_context
*
ggml_init
(
struct
ggml_init_params
params
);
GGML_API
void
ggml_free
(
struct
ggml_context
*
ctx
);
GGML_API
struct
ggml_context
*
ggml_init
(
struct
ggml_init_params
params
);
GGML_API
void
ggml_reset
(
struct
ggml_context
*
ctx
);
GGML_API
void
ggml_free
(
struct
ggml_context
*
ctx
);
GGML_API
size_t
ggml_used_mem
(
const
struct
ggml_context
*
ctx
);
GGML_API
size_t
ggml_set_scratch
(
struct
ggml_context
*
ctx
,
struct
ggml_scratch
scratch
);
GGML_API
bool
ggml_get_no_alloc
(
struct
ggml_context
*
ctx
);
GGML_API
void
ggml_set_no_alloc
(
struct
ggml_context
*
ctx
,
bool
no_alloc
);
...
...
@@ -813,8 +747,7 @@ extern "C" {
int64_t
ne2
,
int64_t
ne3
);
GGML_API
struct
ggml_tensor
*
ggml_new_i32
(
struct
ggml_context
*
ctx
,
int32_t
value
);
GGML_API
struct
ggml_tensor
*
ggml_new_f32
(
struct
ggml_context
*
ctx
,
float
value
);
GGML_API
void
*
ggml_new_buffer
(
struct
ggml_context
*
ctx
,
size_t
nbytes
);
GGML_API
struct
ggml_tensor
*
ggml_dup_tensor
(
struct
ggml_context
*
ctx
,
const
struct
ggml_tensor
*
src
);
GGML_API
struct
ggml_tensor
*
ggml_view_tensor
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
src
);
...
...
@@ -824,35 +757,25 @@ extern "C" {
GGML_API
struct
ggml_tensor
*
ggml_get_next_tensor
(
const
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
tensor
);
GGML_API
struct
ggml_tensor
*
ggml_get_tensor
(
struct
ggml_context
*
ctx
,
const
char
*
name
);
GGML_API
struct
ggml_tensor
*
ggml_set_zero
(
struct
ggml_tensor
*
tensor
);
GGML_API
struct
ggml_tensor
*
ggml_set_i32
(
struct
ggml_tensor
*
tensor
,
int32_t
value
);
GGML_API
struct
ggml_tensor
*
ggml_set_f32
(
struct
ggml_tensor
*
tensor
,
float
value
);
// Converts a flat index into coordinates
GGML_API
void
ggml_unravel_index
(
const
struct
ggml_tensor
*
tensor
,
int64_t
i
,
int64_t
*
i0
,
int64_t
*
i1
,
int64_t
*
i2
,
int64_t
*
i3
);
GGML_API
int32_t
ggml_get_i32_1d
(
const
struct
ggml_tensor
*
tensor
,
int
i
);
GGML_API
void
ggml_set_i32_1d
(
const
struct
ggml_tensor
*
tensor
,
int
i
,
int32_t
value
);
GGML_API
int32_t
ggml_get_i32_nd
(
const
struct
ggml_tensor
*
tensor
,
int
i0
,
int
i1
,
int
i2
,
int
i3
);
GGML_API
void
ggml_set_i32_nd
(
const
struct
ggml_tensor
*
tensor
,
int
i0
,
int
i1
,
int
i2
,
int
i3
,
int32_t
value
);
GGML_API
void
ggml_unravel_index
(
const
struct
ggml_tensor
*
tensor
,
int64_t
i
,
int64_t
*
i0
,
int64_t
*
i1
,
int64_t
*
i2
,
int64_t
*
i3
);
GGML_API
float
ggml_get_f32_1d
(
const
struct
ggml_tensor
*
tensor
,
int
i
);
GGML_API
void
ggml_set_f32_1d
(
const
struct
ggml_tensor
*
tensor
,
int
i
,
float
value
);
GGML_API
float
ggml_get_f32_nd
(
const
struct
ggml_tensor
*
tensor
,
int
i0
,
int
i1
,
int
i2
,
int
i3
);
GGML_API
void
ggml_set_f32_nd
(
const
struct
ggml_tensor
*
tensor
,
int
i0
,
int
i1
,
int
i2
,
int
i3
,
float
value
);
GGML_API
enum
ggml_unary_op
ggml_get_unary_op
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
void
*
ggml_get_data
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
float
*
ggml_get_data_f32
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
GGML_CALL
enum
ggml_unary_op
ggml_get_unary_op
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
const
char
*
ggml_get_name
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
struct
ggml_tensor
*
ggml_set_name
(
struct
ggml_tensor
*
tensor
,
const
char
*
name
);
GGML_ATTRIBUTE_FORMAT
(
2
,
3
)
GGML_API
struct
ggml_tensor
*
ggml_format_name
(
struct
ggml_tensor
*
tensor
,
const
char
*
fmt
,
...);
// Tensor flags
GGML_API
void
ggml_set_input
(
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_set_output
(
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_set_param
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_set_loss
(
struct
ggml_tensor
*
tensor
);
//
// operations on tensors with backpropagation
//
...
...
@@ -967,6 +890,22 @@ extern "C" {
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_sin
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_sin_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_cos
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_cos_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
// return scalar
GGML_API
struct
ggml_tensor
*
ggml_sum
(
struct
ggml_context
*
ctx
,
...
...
@@ -987,6 +926,12 @@ extern "C" {
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
// count number of equal elements in a and b
GGML_API
struct
ggml_tensor
*
ggml_count_equal
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
);
// if a is the same shape as b, and a is not parameter, return a
// otherwise, return a new tensor: repeat(a) to fit in b
GGML_API
struct
ggml_tensor
*
ggml_repeat
(
...
...
@@ -1117,6 +1062,14 @@ extern "C" {
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_exp
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
GGML_API
struct
ggml_tensor
*
ggml_exp_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
);
// normalize along rows
GGML_API
struct
ggml_tensor
*
ggml_norm
(
struct
ggml_context
*
ctx
,
...
...
@@ -1212,7 +1165,7 @@ extern "C" {
size_t
nb1
,
size_t
nb2
,
size_t
nb3
,
size_t
offset
);
size_t
offset
);
// in bytes
// b -> view(a,offset,nb1,nb2,3), return view(a)
GGML_API
struct
ggml_tensor
*
ggml_set_inplace
(
...
...
@@ -1222,19 +1175,19 @@ extern "C" {
size_t
nb1
,
size_t
nb2
,
size_t
nb3
,
size_t
offset
);
size_t
offset
);
// in bytes
GGML_API
struct
ggml_tensor
*
ggml_set_1d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
size_t
offset
);
size_t
offset
);
// in bytes
GGML_API
struct
ggml_tensor
*
ggml_set_1d_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
size_t
offset
);
size_t
offset
);
// in bytes
// b -> view(a,offset,nb1,nb2,3), return modified a
GGML_API
struct
ggml_tensor
*
ggml_set_2d
(
...
...
@@ -1242,7 +1195,7 @@ extern "C" {
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
size_t
nb1
,
size_t
offset
);
size_t
offset
);
// in bytes
// b -> view(a,offset,nb1,nb2,3), return view(a)
GGML_API
struct
ggml_tensor
*
ggml_set_2d_inplace
(
...
...
@@ -1250,7 +1203,7 @@ extern "C" {
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
size_t
nb1
,
size_t
offset
);
size_t
offset
);
// in bytes
// a -> b, return view(b)
GGML_API
struct
ggml_tensor
*
ggml_cpy
(
...
...
@@ -1385,14 +1338,14 @@ extern "C" {
// supports 3D: a->ne[2] == b->ne[1]
GGML_API
struct
ggml_tensor
*
ggml_get_rows
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
);
struct
ggml_tensor
*
a
,
// data
struct
ggml_tensor
*
b
);
// row indices
GGML_API
struct
ggml_tensor
*
ggml_get_rows_back
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
);
struct
ggml_tensor
*
a
,
// gradients of ggml_get_rows result
struct
ggml_tensor
*
b
,
// row indices
struct
ggml_tensor
*
c
);
// data for ggml_get_rows, only used for its shape
GGML_API
struct
ggml_tensor
*
ggml_diag
(
struct
ggml_context
*
ctx
,
...
...
@@ -1453,8 +1406,8 @@ extern "C" {
struct
ggml_tensor
*
b
);
// rotary position embedding
// if mode & 1
== 1,
skip n_past elements (NOT SUPPORTED)
// if mode &
2 == 1,
GPT-NeoX style
// if
(
mode & 1
) -
skip n_past elements (NOT SUPPORTED)
// if
(
mode &
GGML_ROPE_TYPE_NEOX) -
GPT-NeoX style
//
// b is an int32 vector with size a->ne[2], it contains the positions
GGML_API
struct
ggml_tensor
*
ggml_rope
(
...
...
@@ -1536,16 +1489,16 @@ extern "C" {
"use ggml_rope_ext_inplace instead"
);
// compute correction dims for YaRN RoPE scaling
GGML_
CALL
void
ggml_rope_yarn_corr_dims
(
GGML_
API
void
ggml_rope_yarn_corr_dims
(
int
n_dims
,
int
n_ctx_orig
,
float
freq_base
,
float
beta_fast
,
float
beta_slow
,
float
dims
[
2
]);
// rotary position embedding backward, i.e compute dx from dy
// a - dy
GGML_API
struct
ggml_tensor
*
ggml_rope_back
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
,
struct
ggml_tensor
*
a
,
// gradients of ggml_rope result
struct
ggml_tensor
*
b
,
// positions
struct
ggml_tensor
*
c
,
// freq factors
int
n_dims
,
int
mode
,
int
n_ctx_orig
,
...
...
@@ -1564,34 +1517,49 @@ extern "C" {
float
min
,
float
max
);
// im2col
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
GGML_API
struct
ggml_tensor
*
ggml_im2col
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
int
s0
,
int
s1
,
int
p0
,
int
p1
,
int
d0
,
int
d1
,
bool
is_2D
,
enum
ggml_type
dst_type
);
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s0
,
// stride dimension 0
int
s1
,
// stride dimension 1
int
p0
,
// padding dimension 0
int
p1
,
// padding dimension 1
int
d0
,
// dilation dimension 0
int
d1
,
// dilation dimension 1
bool
is_2D
,
enum
ggml_type
dst_type
);
GGML_API
struct
ggml_tensor
*
ggml_im2col_back
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// gradient of im2col output
int64_t
*
ne
,
// shape of im2col input
int
s0
,
// stride dimension 0
int
s1
,
// stride dimension 1
int
p0
,
// padding dimension 0
int
p1
,
// padding dimension 1
int
d0
,
// dilation dimension 0
int
d1
,
// dilation dimension 1
bool
is_2D
);
GGML_API
struct
ggml_tensor
*
ggml_conv_depthwise_2d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
int
s0
,
int
s1
,
int
p0
,
int
p1
,
int
d0
,
int
d1
);
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s0
,
// stride dimension 0
int
s1
,
// stride dimension 1
int
p0
,
// padding dimension 0
int
p1
,
// padding dimension 1
int
d0
,
// dilation dimension 0
int
d1
);
// dilation dimension 1
GGML_API
struct
ggml_tensor
*
ggml_conv_1d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s0
,
// stride
int
p0
,
// padding
int
d0
);
// dilation
...
...
@@ -1600,29 +1568,29 @@ extern "C" {
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API
struct
ggml_tensor
*
ggml_conv_1d_ph
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
int
s
,
int
d
);
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s
,
// stride
int
d
);
// dilation
GGML_API
struct
ggml_tensor
*
ggml_conv_transpose_1d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
int
s0
,
int
p0
,
int
d0
);
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s0
,
// stride
int
p0
,
// padding
int
d0
);
// dilation
GGML_API
struct
ggml_tensor
*
ggml_conv_2d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
int
s0
,
int
s1
,
int
p0
,
int
p1
,
int
d0
,
int
d1
);
struct
ggml_tensor
*
a
,
// convolution kernel
struct
ggml_tensor
*
b
,
// data
int
s0
,
// stride dimension 0
int
s1
,
// stride dimension 1
int
p0
,
// padding dimension 0
int
p1
,
// padding dimension 1
int
d0
,
// dilation dimension 0
int
d1
);
// dilation dimension 1
// kernel size is a->ne[0] x a->ne[1]
...
...
@@ -1684,6 +1652,18 @@ extern "C" {
float
p0
,
float
p1
);
GGML_API
struct
ggml_tensor
*
ggml_pool_2d_back
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
af
,
// "a"/input used in forward pass
enum
ggml_op_pool
op
,
int
k0
,
int
k1
,
int
s0
,
int
s1
,
float
p0
,
float
p1
);
// nearest interpolate
// multiplies ne0 and ne1 by scale factor
// used in stable-diffusion
...
...
@@ -1758,12 +1738,16 @@ extern "C" {
struct
ggml_tensor
*
v
,
struct
ggml_tensor
*
mask
,
float
scale
,
float
max_bias
);
float
max_bias
,
float
logit_softcap
);
GGML_API
void
ggml_flash_attn_ext_set_prec
(
struct
ggml_tensor
*
a
,
enum
ggml_prec
prec
);
GGML_API
enum
ggml_prec
ggml_flash_attn_ext_get_prec
(
const
struct
ggml_tensor
*
a
);
// TODO: needs to be adapted to ggml_flash_attn_ext
GGML_API
struct
ggml_tensor
*
ggml_flash_attn_back
(
struct
ggml_context
*
ctx
,
...
...
@@ -1775,10 +1759,8 @@ extern "C" {
GGML_API
struct
ggml_tensor
*
ggml_ssm_conv
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
s
,
struct
ggml_tensor
*
x
,
struct
ggml_tensor
*
c
,
struct
ggml_tensor
*
sq
);
struct
ggml_tensor
*
sx
,
struct
ggml_tensor
*
c
);
GGML_API
struct
ggml_tensor
*
ggml_ssm_scan
(
struct
ggml_context
*
ctx
,
...
...
@@ -1787,8 +1769,7 @@ extern "C" {
struct
ggml_tensor
*
dt
,
struct
ggml_tensor
*
A
,
struct
ggml_tensor
*
B
,
struct
ggml_tensor
*
C
,
struct
ggml_tensor
*
sq
);
struct
ggml_tensor
*
C
);
// partition into non-overlapping windows with padding if needed
// example:
...
...
@@ -1840,6 +1821,15 @@ extern "C" {
struct
ggml_tensor
*
pw
,
struct
ggml_tensor
*
ph
);
GGML_API
struct
ggml_tensor
*
ggml_rwkv_wkv6
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
k
,
struct
ggml_tensor
*
v
,
struct
ggml_tensor
*
r
,
struct
ggml_tensor
*
tf
,
struct
ggml_tensor
*
td
,
struct
ggml_tensor
*
state
);
// custom operators
typedef
void
(
*
ggml_unary_op_f32_t
)
(
const
int
,
float
*
,
const
float
*
);
...
...
@@ -1923,7 +1913,8 @@ extern "C" {
typedef
void
(
*
ggml_custom2_op_t
)(
struct
ggml_tensor
*
dst
,
const
struct
ggml_tensor
*
a
,
const
struct
ggml_tensor
*
b
,
int
ith
,
int
nth
,
void
*
userdata
);
typedef
void
(
*
ggml_custom3_op_t
)(
struct
ggml_tensor
*
dst
,
const
struct
ggml_tensor
*
a
,
const
struct
ggml_tensor
*
b
,
const
struct
ggml_tensor
*
c
,
int
ith
,
int
nth
,
void
*
userdata
);
#define GGML_N_TASKS_MAX -1
#define GGML_N_TASKS_MAX (-1)
// n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
GGML_API
struct
ggml_tensor
*
ggml_map_custom1
(
struct
ggml_context
*
ctx
,
...
...
@@ -1976,49 +1967,59 @@ extern "C" {
// loss function
GGML_API
struct
ggml_tensor
*
ggml_cross_entropy_loss
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
);
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
// logits
struct
ggml_tensor
*
b
);
// labels
GGML_API
struct
ggml_tensor
*
ggml_cross_entropy_loss_back
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
);
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
// logits
struct
ggml_tensor
*
b
,
// labels
struct
ggml_tensor
*
c
);
// gradients of cross_entropy_loss result
// AdamW optimizer step
// Paper: https://arxiv.org/pdf/1711.05101v3.pdf
// PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
GGML_API
struct
ggml_tensor
*
ggml_opt_step_adamw
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
grad
,
struct
ggml_tensor
*
m
,
struct
ggml_tensor
*
v
,
struct
ggml_tensor
*
adamw_params
);
// parameters such a the learning rate
//
// automatic differentiation
//
GGML_API
void
ggml_set_param
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_build_forward_expand
(
struct
ggml_cgraph
*
cgraph
,
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_build_backward_expand
(
struct
ggml_context
*
ctx_static
,
// context for static gradients (loss + gradient accumulation)
struct
ggml_context
*
ctx_compute
,
// context for gradient computation
struct
ggml_cgraph
*
cgraph
,
bool
accumulate
);
// whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
// graph allocation in a context
GGML_API
struct
ggml_cgraph
*
ggml_new_graph
(
struct
ggml_context
*
ctx
);
// size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API
struct
ggml_cgraph
*
ggml_new_graph_custom
(
struct
ggml_context
*
ctx
,
size_t
size
,
bool
grads
);
GGML_API
struct
ggml_cgraph
*
ggml_graph_dup
(
struct
ggml_context
*
ctx
,
struct
ggml_cgraph
*
cgraph
);
GGML_API
void
ggml_graph_cpy
(
struct
ggml_cgraph
*
src
,
struct
ggml_cgraph
*
dst
);
GGML_API
void
ggml_graph_reset
(
struct
ggml_cgraph
*
cgraph
);
// set regular grads + optimizer momenta to 0, set loss grad to 1
GGML_API
void
ggml_graph_clear
(
struct
ggml_cgraph
*
cgraph
);
GGML_API
void
ggml_build_forward_expand
(
struct
ggml_cgraph
*
cgraph
,
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_build_backward_expand
(
struct
ggml_context
*
ctx
,
struct
ggml_cgraph
*
gf
,
struct
ggml_cgraph
*
gb
,
bool
keep
);
GGML_API
int
ggml_graph_size
(
struct
ggml_cgraph
*
cgraph
);
GGML_API
struct
ggml_tensor
*
ggml_graph_node
(
struct
ggml_cgraph
*
cgraph
,
int
i
);
// if i < 0, returns nodes[n_nodes + i]
GGML_API
struct
ggml_tensor
**
ggml_graph_nodes
(
struct
ggml_cgraph
*
cgraph
);
GGML_API
int
ggml_graph_n_nodes
(
struct
ggml_cgraph
*
cgraph
);
// graph allocation in a context
GGML_API
struct
ggml_cgraph
*
ggml_new_graph
(
struct
ggml_context
*
ctx
);
// size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API
struct
ggml_cgraph
*
ggml_new_graph_custom
(
struct
ggml_context
*
ctx
,
size_t
size
,
bool
grads
);
GGML_API
struct
ggml_cgraph
*
ggml_graph_dup
(
struct
ggml_context
*
ctx
,
struct
ggml_cgraph
*
cgraph
);
GGML_API
struct
ggml_cgraph
ggml_graph_view
(
struct
ggml_cgraph
*
cgraph
,
int
i0
,
int
i1
);
GGML_API
void
ggml_graph_cpy
(
struct
ggml_cgraph
*
src
,
struct
ggml_cgraph
*
dst
);
GGML_API
void
ggml_graph_reset
(
struct
ggml_cgraph
*
cgraph
);
// zero grads
GGML_API
void
ggml_graph_clear
(
struct
ggml_cgraph
*
cgraph
);
GGML_API
void
ggml_graph_add_node
(
struct
ggml_cgraph
*
cgraph
,
struct
ggml_tensor
*
tensor
);
GGML_API
size_t
ggml_graph_overhead
(
void
);
GGML_API
size_t
ggml_graph_overhead_custom
(
size_t
size
,
bool
grads
);
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API
struct
ggml_cplan
ggml_graph_plan
(
const
struct
ggml_cgraph
*
cgraph
,
int
n_threads
/*= GGML_DEFAULT_N_THREADS*/
);
GGML_API
enum
ggml_status
ggml_graph_compute
(
struct
ggml_cgraph
*
cgraph
,
struct
ggml_cplan
*
cplan
);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API
enum
ggml_status
ggml_graph_compute_with_ctx
(
struct
ggml_context
*
ctx
,
struct
ggml_cgraph
*
cgraph
,
int
n_threads
);
GGML_API
struct
ggml_tensor
*
ggml_graph_get_tensor
(
struct
ggml_cgraph
*
cgraph
,
const
char
*
name
);
GGML_API
struct
ggml_tensor
*
ggml_graph_get_tensor
(
const
struct
ggml_cgraph
*
cgraph
,
const
char
*
name
);
GGML_API
struct
ggml_tensor
*
ggml_graph_get_grad
(
const
struct
ggml_cgraph
*
cgraph
,
const
struct
ggml_tensor
*
node
);
GGML_API
struct
ggml_tensor
*
ggml_graph_get_grad_acc
(
const
struct
ggml_cgraph
*
cgraph
,
const
struct
ggml_tensor
*
node
);
GGML_API
void
ggml_graph_export
(
const
struct
ggml_cgraph
*
cgraph
,
const
char
*
fname
);
GGML_API
struct
ggml_cgraph
*
ggml_graph_import
(
const
char
*
fname
,
struct
ggml_context
**
ctx_data
,
struct
ggml_context
**
ctx_eval
);
...
...
@@ -2029,197 +2030,14 @@ extern "C" {
// dump the graph into a file using the dot format
GGML_API
void
ggml_graph_dump_dot
(
const
struct
ggml_cgraph
*
gb
,
const
struct
ggml_cgraph
*
gf
,
const
char
*
filename
);
// build gradient checkpointing backward graph gb for gf using provided checkpoints
// gb_tmp will contain original backward graph with rewritten backward process nodes,
// but without the second forward pass nodes.
GGML_API
void
ggml_build_backward_gradient_checkpointing
(
struct
ggml_context
*
ctx
,
struct
ggml_cgraph
*
gf
,
struct
ggml_cgraph
*
gb
,
struct
ggml_cgraph
*
gb_tmp
,
struct
ggml_tensor
*
*
checkpoints
,
int
n_checkpoints
);
//
// optimization
//
// optimization methods
enum
ggml_opt_type
{
GGML_OPT_TYPE_ADAM
,
GGML_OPT_TYPE_LBFGS
,
};
// linesearch methods
enum
ggml_linesearch
{
GGML_LINESEARCH_DEFAULT
=
1
,
GGML_LINESEARCH_BACKTRACKING_ARMIJO
=
0
,
GGML_LINESEARCH_BACKTRACKING_WOLFE
=
1
,
GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE
=
2
,
};
// optimization return values
enum
ggml_opt_result
{
GGML_OPT_RESULT_OK
=
0
,
GGML_OPT_RESULT_DID_NOT_CONVERGE
,
GGML_OPT_RESULT_NO_CONTEXT
,
GGML_OPT_RESULT_INVALID_WOLFE
,
GGML_OPT_RESULT_FAIL
,
GGML_OPT_RESULT_CANCEL
,
GGML_LINESEARCH_FAIL
=
-
128
,
GGML_LINESEARCH_MINIMUM_STEP
,
GGML_LINESEARCH_MAXIMUM_STEP
,
GGML_LINESEARCH_MAXIMUM_ITERATIONS
,
GGML_LINESEARCH_INVALID_PARAMETERS
,
};
typedef
void
(
*
ggml_opt_callback
)(
void
*
data
,
int
accum_step
,
float
*
sched
,
bool
*
cancel
);
// TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
typedef
void
(
*
ggml_log_callback
)(
enum
ggml_log_level
level
,
const
char
*
text
,
void
*
user_data
);
// optimization parameters
//
// see ggml.c (ggml_opt_default_params) for default values
//
struct
ggml_opt_params
{
enum
ggml_opt_type
type
;
size_t
graph_size
;
int
n_threads
;
// delta-based convergence test
//
// if past == 0 - disabled
// if past > 0:
// stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
//
int
past
;
float
delta
;
// maximum number of iterations without improvement
//
// if 0 - disabled
// if > 0:
// assume convergence if no cost improvement in this number of iterations
//
int
max_no_improvement
;
bool
print_forward_graph
;
bool
print_backward_graph
;
int
n_gradient_accumulation
;
// ADAM parameters
struct
{
int
n_iter
;
float
sched
;
// schedule multiplier (fixed, decay or warmup)
float
decay
;
// weight decay for AdamW, use 0.0f to disable
int
decay_min_ndim
;
// minimum number of tensor dimension to apply weight decay
float
alpha
;
// learning rate
float
beta1
;
float
beta2
;
float
eps
;
// epsilon for numerical stability
float
eps_f
;
// epsilon for convergence test
float
eps_g
;
// epsilon for convergence test
float
gclip
;
// gradient clipping
}
adam
;
// LBFGS parameters
struct
{
int
m
;
// number of corrections to approximate the inv. Hessian
int
n_iter
;
int
max_linesearch
;
float
eps
;
// convergence tolerance
float
ftol
;
// line search tolerance
float
wolfe
;
float
min_step
;
float
max_step
;
enum
ggml_linesearch
linesearch
;
}
lbfgs
;
};
struct
ggml_opt_context
{
struct
ggml_context
*
ctx
;
struct
ggml_opt_params
params
;
int
iter
;
int64_t
nx
;
// number of parameter elements
bool
just_initialized
;
float
loss_before
;
float
loss_after
;
struct
{
struct
ggml_tensor
*
g
;
// current gradient
struct
ggml_tensor
*
m
;
// first moment
struct
ggml_tensor
*
v
;
// second moment
struct
ggml_tensor
*
pf
;
// past function values
float
fx_best
;
float
fx_prev
;
int
n_no_improvement
;
}
adam
;
struct
{
struct
ggml_tensor
*
x
;
// current parameters
struct
ggml_tensor
*
xp
;
// previous parameters
struct
ggml_tensor
*
g
;
// current gradient
struct
ggml_tensor
*
gp
;
// previous gradient
struct
ggml_tensor
*
d
;
// search direction
struct
ggml_tensor
*
pf
;
// past function values
struct
ggml_tensor
*
lmal
;
// the L-BFGS memory alpha
struct
ggml_tensor
*
lmys
;
// the L-BFGS memory ys
struct
ggml_tensor
*
lms
;
// the L-BFGS memory s
struct
ggml_tensor
*
lmy
;
// the L-BFGS memory y
float
fx_best
;
float
step
;
int
j
;
int
k
;
int
end
;
int
n_no_improvement
;
}
lbfgs
;
};
// Set callback for all future logging events.
// If this is not called, or NULL is supplied, everything is output on stderr.
GGML_API
void
ggml_log_set
(
ggml_log_callback
log_callback
,
void
*
user_data
);
GGML_API
struct
ggml_opt_params
ggml_opt_default_params
(
enum
ggml_opt_type
type
);
// optimize the function defined by the tensor f
GGML_API
enum
ggml_opt_result
ggml_opt
(
struct
ggml_context
*
ctx
,
struct
ggml_opt_params
params
,
struct
ggml_tensor
*
f
);
// initialize optimizer context
GGML_API
void
ggml_opt_init
(
struct
ggml_context
*
ctx
,
struct
ggml_opt_context
*
opt
,
struct
ggml_opt_params
params
,
int64_t
nx
);
// continue optimizing the function defined by the tensor f
GGML_API
enum
ggml_opt_result
ggml_opt_resume
(
struct
ggml_context
*
ctx
,
struct
ggml_opt_context
*
opt
,
struct
ggml_tensor
*
f
);
// continue optimizing the function defined by the tensor f
GGML_API
enum
ggml_opt_result
ggml_opt_resume_g
(
struct
ggml_context
*
ctx
,
struct
ggml_opt_context
*
opt
,
struct
ggml_tensor
*
f
,
struct
ggml_cgraph
*
gf
,
struct
ggml_cgraph
*
gb
,
ggml_opt_callback
callback
,
void
*
callback_data
);
//
// tensor flags
//
GGML_API
void
ggml_set_input
(
struct
ggml_tensor
*
tensor
);
GGML_API
void
ggml_set_output
(
struct
ggml_tensor
*
tensor
);
GGML_API
struct
ggml_tensor
*
ggml_set_zero
(
struct
ggml_tensor
*
tensor
);
//
// quantization
...
...
@@ -2376,43 +2194,6 @@ extern "C" {
GGML_API
size_t
gguf_get_meta_size
(
const
struct
gguf_context
*
ctx
);
GGML_API
void
gguf_get_meta_data
(
const
struct
gguf_context
*
ctx
,
void
*
data
);
//
// system info
//
GGML_API
int
ggml_cpu_has_avx
(
void
);
GGML_API
int
ggml_cpu_has_avx_vnni
(
void
);
GGML_API
int
ggml_cpu_has_avx2
(
void
);
GGML_API
int
ggml_cpu_has_avx512
(
void
);
GGML_API
int
ggml_cpu_has_avx512_vbmi
(
void
);
GGML_API
int
ggml_cpu_has_avx512_vnni
(
void
);
GGML_API
int
ggml_cpu_has_avx512_bf16
(
void
);
GGML_API
int
ggml_cpu_has_fma
(
void
);
GGML_API
int
ggml_cpu_has_neon
(
void
);
GGML_API
int
ggml_cpu_has_sve
(
void
);
GGML_API
int
ggml_cpu_has_arm_fma
(
void
);
GGML_API
int
ggml_cpu_has_metal
(
void
);
GGML_API
int
ggml_cpu_has_f16c
(
void
);
GGML_API
int
ggml_cpu_has_fp16_va
(
void
);
GGML_API
int
ggml_cpu_has_wasm_simd
(
void
);
GGML_API
int
ggml_cpu_has_blas
(
void
);
GGML_API
int
ggml_cpu_has_cuda
(
void
);
GGML_API
int
ggml_cpu_has_vulkan
(
void
);
GGML_API
int
ggml_cpu_has_kompute
(
void
);
GGML_API
int
ggml_cpu_has_gpublas
(
void
);
GGML_API
int
ggml_cpu_has_sse3
(
void
);
GGML_API
int
ggml_cpu_has_ssse3
(
void
);
GGML_API
int
ggml_cpu_has_sycl
(
void
);
GGML_API
int
ggml_cpu_has_rpc
(
void
);
GGML_API
int
ggml_cpu_has_vsx
(
void
);
GGML_API
int
ggml_cpu_has_matmul_int8
(
void
);
GGML_API
int
ggml_cpu_has_cann
(
void
);
GGML_API
int
ggml_cpu_has_llamafile
(
void
);
//
// Internal types and functions exposed for tests and benchmarks
//
#ifdef __cplusplus
// restrict not standard in C++
#define GGML_RESTRICT
...
...
@@ -2421,34 +2202,18 @@ extern "C" {
#endif
typedef
void
(
*
ggml_to_float_t
)
(
const
void
*
GGML_RESTRICT
x
,
float
*
GGML_RESTRICT
y
,
int64_t
k
);
typedef
void
(
*
ggml_from_float_t
)(
const
float
*
GGML_RESTRICT
x
,
void
*
GGML_RESTRICT
y
,
int64_t
k
);
typedef
void
(
*
ggml_from_float_to_mat_t
)
(
const
float
*
GGML_RESTRICT
x
,
void
*
GGML_RESTRICT
y
,
int64_t
nr
,
int64_t
k
,
int64_t
bs
);
typedef
void
(
*
ggml_vec_dot_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
size_t
bx
,
const
void
*
GGML_RESTRICT
y
,
size_t
by
,
int
nrc
);
typedef
void
(
*
ggml_gemv_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
const
void
*
GGML_RESTRICT
y
,
int
nr
,
int
nc
);
typedef
void
(
*
ggml_gemm_t
)
(
int
n
,
float
*
GGML_RESTRICT
s
,
size_t
bs
,
const
void
*
GGML_RESTRICT
x
,
const
void
*
GGML_RESTRICT
y
,
int
nr
,
int
nc
);
typedef
struct
{
struct
ggml_type_traits
{
const
char
*
type_name
;
int64_t
blck_size
;
int64_t
blck_size_interleave
;
// interleave elements in blocks
size_t
type_size
;
bool
is_quantized
;
ggml_to_float_t
to_float
;
ggml_from_float_t
from_float
;
ggml_from_float_t
from_float_ref
;
ggml_from_float_to_mat_t
from_float_to_mat
;
ggml_vec_dot_t
vec_dot
;
enum
ggml_type
vec_dot_type
;
int64_t
nrows
;
// number of rows to process simultaneously
int64_t
ncols
;
// number of columns to process simultaneously
ggml_gemv_t
gemv
;
ggml_gemm_t
gemm
;
}
ggml_type_traits_t
;
GGML_API
ggml_type_traits_t
ggml_internal_get_type_traits
(
enum
ggml_type
type
);
};
GGML_API
const
struct
ggml_type_traits
*
ggml_get_type_traits
(
enum
ggml_type
type
);
#ifdef __cplusplus
}
...
...
ggml/src/CMakeLists.txt
View file @
97ef6ff8
include
(
CheckCXXCompilerFlag
)
unset
(
GGML_CDEF_PUBLIC
)
add_compile_definitions
(
GGML_SCHED_MAX_COPIES=
${
GGML_SCHED_MAX_COPIES
}
)
# enable libstdc++ assertions for debug builds
...
...
@@ -26,860 +24,6 @@ if (NOT MSVC)
endif
()
endif
()
if
(
APPLE AND GGML_ACCELERATE
)
find_library
(
ACCELERATE_FRAMEWORK Accelerate
)
if
(
ACCELERATE_FRAMEWORK
)
message
(
STATUS
"Accelerate framework found"
)
add_compile_definitions
(
GGML_USE_ACCELERATE
)
add_compile_definitions
(
ACCELERATE_NEW_LAPACK
)
add_compile_definitions
(
ACCELERATE_LAPACK_ILP64
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
${
ACCELERATE_FRAMEWORK
}
)
else
()
message
(
WARNING
"Accelerate framework not found"
)
endif
()
endif
()
if
(
GGML_METAL
)
find_library
(
FOUNDATION_LIBRARY Foundation REQUIRED
)
find_library
(
METAL_FRAMEWORK Metal REQUIRED
)
find_library
(
METALKIT_FRAMEWORK MetalKit REQUIRED
)
message
(
STATUS
"Metal framework found"
)
set
(
GGML_HEADERS_METAL ../include/ggml-metal.h
)
set
(
GGML_SOURCES_METAL ggml-metal.m
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_METAL
)
if
(
GGML_METAL_NDEBUG
)
add_compile_definitions
(
GGML_METAL_NDEBUG
)
endif
()
# copy ggml-common.h and ggml-metal.metal to bin directory
configure_file
(
ggml-common.h
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-common.h COPYONLY
)
configure_file
(
ggml-metal.metal
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.metal COPYONLY
)
if
(
GGML_METAL_EMBED_LIBRARY
)
enable_language
(
ASM
)
add_compile_definitions
(
GGML_METAL_EMBED_LIBRARY
)
set
(
METALLIB_COMMON
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/ggml-common.h"
)
set
(
METALLIB_SOURCE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/ggml-metal.metal"
)
file
(
MAKE_DIRECTORY
"
${
CMAKE_BINARY_DIR
}
/autogenerated"
)
# merge ggml-common.h and ggml-metal.metal into a single file
set
(
METALLIB_EMBED_ASM
"
${
CMAKE_BINARY_DIR
}
/autogenerated/ggml-metal-embed.s"
)
set
(
METALLIB_SOURCE_EMBED
"
${
CMAKE_BINARY_DIR
}
/autogenerated/ggml-metal-embed.metal"
)
add_custom_command
(
OUTPUT
${
METALLIB_EMBED_ASM
}
COMMAND echo
"Embedding Metal library"
COMMAND sed -e '/\
#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
COMMAND echo
".section __DATA,__ggml_metallib"
>
${
METALLIB_EMBED_ASM
}
COMMAND echo
".globl _ggml_metallib_start"
>>
${
METALLIB_EMBED_ASM
}
COMMAND echo
"_ggml_metallib_start:"
>>
${
METALLIB_EMBED_ASM
}
COMMAND echo
".incbin
\\\"
${
METALLIB_SOURCE_EMBED
}
\\\"
"
>>
${
METALLIB_EMBED_ASM
}
COMMAND echo
".globl _ggml_metallib_end"
>>
${
METALLIB_EMBED_ASM
}
COMMAND echo
"_ggml_metallib_end:"
>>
${
METALLIB_EMBED_ASM
}
DEPENDS ggml-metal.metal ggml-common.h
COMMENT
"Generate assembly for embedded Metal library"
)
set
(
GGML_SOURCES_METAL
${
GGML_SOURCES_METAL
}
${
METALLIB_EMBED_ASM
}
)
else
()
if
(
GGML_METAL_SHADER_DEBUG
)
# custom command to do the following:
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
#
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
# disabling fast math is needed in order to pass tests/test-backend-ops
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
set
(
XC_FLAGS -fno-fast-math -fno-inline -g
)
else
()
set
(
XC_FLAGS -O3
)
endif
()
# Append macOS metal versioning flags
if
(
GGML_METAL_MACOSX_VERSION_MIN
)
message
(
STATUS
"Adding -mmacosx-version-min=
${
GGML_METAL_MACOSX_VERSION_MIN
}
flag to metal compilation"
)
list
(
APPEND XC_FLAGS -mmacosx-version-min=
${
GGML_METAL_MACOSX_VERSION_MIN
}
)
endif
()
if
(
GGML_METAL_STD
)
message
(
STATUS
"Adding -std=
${
GGML_METAL_STD
}
flag to metal compilation"
)
list
(
APPEND XC_FLAGS -std=
${
GGML_METAL_STD
}
)
endif
()
add_custom_command
(
OUTPUT
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/default.metallib
COMMAND xcrun -sdk macosx metal
${
XC_FLAGS
}
-c
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.metal -o
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.air
COMMAND xcrun -sdk macosx metallib
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.air -o
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/default.metallib
COMMAND rm -f
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.air
COMMAND rm -f
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-common.h
COMMAND rm -f
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/ggml-metal.metal
DEPENDS ggml-metal.metal ggml-common.h
COMMENT
"Compiling Metal kernels"
)
add_custom_target
(
ggml-metal ALL
DEPENDS
${
CMAKE_RUNTIME_OUTPUT_DIRECTORY
}
/default.metallib
)
endif
()
# GGML_METAL_EMBED_LIBRARY
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
${
FOUNDATION_LIBRARY
}
${
METAL_FRAMEWORK
}
${
METALKIT_FRAMEWORK
}
)
endif
()
if
(
GGML_MUSA
)
set
(
CMAKE_C_COMPILER clang
)
set
(
CMAKE_C_EXTENSIONS OFF
)
set
(
CMAKE_CXX_COMPILER clang++
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
set
(
GGML_CUDA ON
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_MUSA
)
endif
()
if
(
GGML_OPENMP
)
find_package
(
OpenMP
)
if
(
OpenMP_FOUND
)
message
(
STATUS
"OpenMP found"
)
add_compile_definitions
(
GGML_USE_OPENMP
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
OpenMP::OpenMP_C OpenMP::OpenMP_CXX
)
if
(
GGML_MUSA
)
set
(
GGML_EXTRA_INCLUDES
${
GGML_EXTRA_INCLUDES
}
"/usr/lib/llvm-10/include/openmp"
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
"/usr/lib/llvm-10/lib/libomp.so"
)
endif
()
else
()
message
(
WARNING
"OpenMP not found"
)
endif
()
endif
()
if
(
GGML_BLAS
)
if
(
GGML_STATIC
)
set
(
BLA_STATIC ON
)
endif
()
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
# set(BLA_SIZEOF_INTEGER 8)
#endif()
set
(
BLA_VENDOR
${
GGML_BLAS_VENDOR
}
)
find_package
(
BLAS
)
if
(
BLAS_FOUND
)
message
(
STATUS
"BLAS found, Libraries:
${
BLAS_LIBRARIES
}
"
)
if
((
"
${
BLAS_INCLUDE_DIRS
}
"
STREQUAL
""
)
AND
NOT
(
${
GGML_BLAS_VENDOR
}
MATCHES
"Apple"
))
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package
(
PkgConfig REQUIRED
)
if
(
${
GGML_BLAS_VENDOR
}
MATCHES
"Generic"
)
pkg_check_modules
(
DepBLAS REQUIRED blas
)
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"OpenBLAS"
)
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
pkg_check_modules
(
DepBLAS openblas64
)
if
(
NOT DepBLAS_FOUND
)
pkg_check_modules
(
DepBLAS REQUIRED openblas
)
endif
()
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"FLAME"
)
pkg_check_modules
(
DepBLAS REQUIRED blis
)
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"ATLAS"
)
pkg_check_modules
(
DepBLAS REQUIRED blas-atlas
)
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"FlexiBLAS"
)
pkg_check_modules
(
DepBLAS REQUIRED flexiblas_api
)
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"Intel"
)
# all Intel* libraries share the same include path
pkg_check_modules
(
DepBLAS REQUIRED mkl-sdl
)
elseif
(
${
GGML_BLAS_VENDOR
}
MATCHES
"NVHPC"
)
# this doesn't provide pkg-config
# suggest to assign BLAS_INCLUDE_DIRS on your own
if
(
"
${
NVHPC_VERSION
}
"
STREQUAL
""
)
message
(
WARNING
"Better to set NVHPC_VERSION"
)
else
()
set
(
DepBLAS_FOUND ON
)
set
(
DepBLAS_INCLUDE_DIRS
"/opt/nvidia/hpc_sdk/
${
CMAKE_SYSTEM_NAME
}
_
${
CMAKE_SYSTEM_PROCESSOR
}
/
${
NVHPC_VERSION
}
/math_libs/include"
)
endif
()
endif
()
if
(
DepBLAS_FOUND
)
set
(
BLAS_INCLUDE_DIRS
${
DepBLAS_INCLUDE_DIRS
}
)
else
()
message
(
WARNING
"BLAS_INCLUDE_DIRS neither been provided nor been automatically"
" detected by pkgconfig, trying to find cblas.h from possible paths..."
)
find_path
(
BLAS_INCLUDE_DIRS
NAMES cblas.h
HINTS
/usr/include
/usr/local/include
/usr/include/openblas
/opt/homebrew/opt/openblas/include
/usr/local/opt/openblas/include
/usr/include/x86_64-linux-gnu/openblas/include
)
endif
()
endif
()
message
(
STATUS
"BLAS found, Includes:
${
BLAS_INCLUDE_DIRS
}
"
)
add_compile_options
(
${
BLAS_LINKER_FLAGS
}
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_BLAS
)
if
(
${
BLAS_INCLUDE_DIRS
}
MATCHES
"mkl"
AND
(
${
GGML_BLAS_VENDOR
}
MATCHES
"Generic"
OR
${
GGML_BLAS_VENDOR
}
MATCHES
"Intel"
))
add_compile_definitions
(
GGML_BLAS_USE_MKL
)
endif
()
set
(
GGML_HEADERS_BLAS ../include/ggml-blas.h
)
set
(
GGML_SOURCES_BLAS ggml-blas.cpp
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
${
BLAS_LIBRARIES
}
)
set
(
GGML_EXTRA_INCLUDES
${
GGML_EXTRA_INCLUDES
}
${
BLAS_INCLUDE_DIRS
}
)
else
()
message
(
WARNING
"BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
" to set correct GGML_BLAS_VENDOR"
)
endif
()
endif
()
if
(
GGML_LLAMAFILE
)
message
(
STATUS
"Using llamafile"
)
add_compile_definitions
(
GGML_USE_LLAMAFILE
)
set
(
GGML_HEADERS_LLAMAFILE llamafile/sgemm.h
)
set
(
GGML_SOURCES_LLAMAFILE llamafile/sgemm.cpp
)
endif
()
if
(
GGML_CUDA
)
cmake_minimum_required
(
VERSION 3.18
)
# for CMAKE_CUDA_ARCHITECTURES
if
(
GGML_MUSA
)
list
(
APPEND CMAKE_MODULE_PATH
"/usr/local/musa/cmake/"
)
find_package
(
MUSAToolkit
)
set
(
CUDAToolkit_FOUND
${
MUSAToolkit_FOUND
}
)
else
()
find_package
(
CUDAToolkit
)
endif
()
if
(
CUDAToolkit_FOUND
)
message
(
STATUS
"CUDA found"
)
if
(
NOT DEFINED CMAKE_CUDA_ARCHITECTURES
)
# 52 == lowest CUDA 12 standard
# 60 == FP16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if
(
GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16
)
set
(
CMAKE_CUDA_ARCHITECTURES
"60;61;70;75"
)
else
()
set
(
CMAKE_CUDA_ARCHITECTURES
"52;61;70;75"
)
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif
()
endif
()
message
(
STATUS
"Using CUDA architectures:
${
CMAKE_CUDA_ARCHITECTURES
}
"
)
if
(
GGML_MUSA
)
set
(
CMAKE_CUDA_COMPILER
${
MUSAToolkit_MCC_EXECUTABLE
}
)
else
()
enable_language
(
CUDA
)
endif
()
file
(
GLOB GGML_HEADERS_CUDA
"ggml-cuda/*.cuh"
)
list
(
APPEND GGML_HEADERS_CUDA
"../include/ggml-cuda.h"
)
file
(
GLOB GGML_SOURCES_CUDA
"ggml-cuda/*.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
"ggml-cuda.cu"
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-wmma*.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/mmq*.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
if
(
GGML_CUDA_FA_ALL_QUANTS
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
add_compile_definitions
(
GGML_CUDA_FA_ALL_QUANTS
)
else
()
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*f16-f16.cu"
)
list
(
APPEND GGML_SOURCES_CUDA
${
SRCS
}
)
endif
()
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA
)
add_compile_definitions
(
GGML_CUDA_DMMV_X=
${
GGML_CUDA_DMMV_X
}
)
add_compile_definitions
(
GGML_CUDA_MMV_Y=
${
GGML_CUDA_MMV_Y
}
)
add_compile_definitions
(
K_QUANTS_PER_ITERATION=
${
GGML_CUDA_KQUANTS_ITER
}
)
add_compile_definitions
(
GGML_CUDA_PEER_MAX_BATCH_SIZE=
${
GGML_CUDA_PEER_MAX_BATCH_SIZE
}
)
if
(
GGML_CUDA_USE_GRAPHS
)
add_compile_definitions
(
GGML_CUDA_USE_GRAPHS
)
endif
()
if
(
GGML_CUDA_FORCE_DMMV
)
add_compile_definitions
(
GGML_CUDA_FORCE_DMMV
)
endif
()
if
(
GGML_CUDA_FORCE_MMQ
)
add_compile_definitions
(
GGML_CUDA_FORCE_MMQ
)
endif
()
if
(
GGML_CUDA_FORCE_CUBLAS
)
add_compile_definitions
(
GGML_CUDA_FORCE_CUBLAS
)
endif
()
if
(
GGML_CUDA_NO_VMM
)
add_compile_definitions
(
GGML_CUDA_NO_VMM
)
endif
()
if
(
DEFINED GGML_CUDA_DMMV_Y
)
add_compile_definitions
(
GGML_CUDA_MMV_Y=
${
GGML_CUDA_DMMV_Y
}
)
# for backwards compatibility
endif
()
if
(
GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16
)
add_compile_definitions
(
GGML_CUDA_F16
)
endif
()
if
(
GGML_CUDA_NO_PEER_COPY
)
add_compile_definitions
(
GGML_CUDA_NO_PEER_COPY
)
endif
()
if
(
GGML_MUSA
)
set_source_files_properties
(
${
GGML_SOURCES_CUDA
}
PROPERTIES LANGUAGE CXX
)
foreach
(
SOURCE
${
GGML_SOURCES_CUDA
}
)
set_property
(
SOURCE
${
SOURCE
}
PROPERTY COMPILE_FLAGS
"-x musa -mtgpu --cuda-gpu-arch=mp_22"
)
endforeach
()
endif
()
if
(
GGML_STATIC
)
if
(
WIN32
)
# As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
CUDA::cudart_static CUDA::cublas CUDA::cublasLt
)
else
()
if
(
GGML_MUSA
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
MUSA::musart_static MUSA::mublas_static
)
else
()
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static
)
endif
()
endif
()
else
()
if
(
GGML_MUSA
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
MUSA::musart MUSA::mublas
)
else
()
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
CUDA::cudart CUDA::cublas CUDA::cublasLt
)
endif
()
endif
()
if
(
GGML_CUDA_NO_VMM
)
# No VMM requested, no need to link directly with the cuda driver lib (libcuda.so)
else
()
if
(
GGML_MUSA
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
MUSA::musa_driver
)
# required by muDeviceGetAttribute(), muMemGetAllocationGranularity(...), ...
else
()
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
CUDA::cuda_driver
)
# required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
endif
()
endif
()
else
()
message
(
WARNING
"CUDA not found"
)
endif
()
endif
()
if
(
GGML_HIPBLAS
)
if
(
NOT EXISTS $ENV{ROCM_PATH}
)
if
(
NOT EXISTS /opt/rocm
)
set
(
ROCM_PATH /usr
)
else
()
set
(
ROCM_PATH /opt/rocm
)
endif
()
else
()
set
(
ROCM_PATH $ENV{ROCM_PATH}
)
endif
()
list
(
APPEND CMAKE_PREFIX_PATH
${
ROCM_PATH
}
)
list
(
APPEND CMAKE_PREFIX_PATH
"
${
ROCM_PATH
}
/lib64/cmake"
)
# CMake on Windows doesn't support the HIP language yet
if
(
WIN32
)
set
(
CXX_IS_HIPCC TRUE
)
else
()
string
(
REGEX MATCH
"hipcc(\.bat)?$"
CXX_IS_HIPCC
"
${
CMAKE_CXX_COMPILER
}
"
)
endif
()
if
(
CXX_IS_HIPCC
)
if
(
LINUX
)
if
(
NOT
${
CMAKE_CXX_COMPILER_ID
}
MATCHES
"Clang"
)
message
(
WARNING
"Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++"
)
endif
()
message
(
WARNING
"Setting hipcc as the C++ compiler is legacy behavior."
" Prefer setting the HIP compiler directly. See README for details."
)
endif
()
else
()
# Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES.
if
(
AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES
)
set
(
CMAKE_HIP_ARCHITECTURES
${
AMDGPU_TARGETS
}
)
endif
()
cmake_minimum_required
(
VERSION 3.21
)
enable_language
(
HIP
)
endif
()
find_package
(
hip REQUIRED
)
find_package
(
hipblas REQUIRED
)
find_package
(
rocblas REQUIRED
)
message
(
STATUS
"HIP and hipBLAS found"
)
file
(
GLOB GGML_HEADERS_ROCM
"ggml-cuda/*.cuh"
)
list
(
APPEND GGML_HEADERS_ROCM
"../include/ggml-cuda.h"
)
file
(
GLOB GGML_SOURCES_ROCM
"ggml-cuda/*.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
"ggml-cuda.cu"
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-wmma*.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/mmq*.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
if
(
GGML_CUDA_FA_ALL_QUANTS
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
add_compile_definitions
(
GGML_CUDA_FA_ALL_QUANTS
)
else
()
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
file
(
GLOB SRCS
"ggml-cuda/template-instances/fattn-vec*f16-f16.cu"
)
list
(
APPEND GGML_SOURCES_ROCM
${
SRCS
}
)
endif
()
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_CUDA
)
add_compile_definitions
(
GGML_USE_HIPBLAS
)
add_compile_definitions
(
GGML_CUDA_DMMV_X=
${
GGML_CUDA_DMMV_X
}
)
add_compile_definitions
(
GGML_CUDA_MMV_Y=
${
GGML_CUDA_MMV_Y
}
)
add_compile_definitions
(
K_QUANTS_PER_ITERATION=
${
GGML_CUDA_KQUANTS_ITER
}
)
if
(
GGML_HIP_UMA
)
add_compile_definitions
(
GGML_HIP_UMA
)
endif
()
if
(
GGML_CUDA_FORCE_DMMV
)
add_compile_definitions
(
GGML_CUDA_FORCE_DMMV
)
endif
()
if
(
GGML_CUDA_FORCE_MMQ
)
add_compile_definitions
(
GGML_CUDA_FORCE_MMQ
)
endif
()
if
(
GGML_CUDA_FORCE_CUBLAS
)
add_compile_definitions
(
GGML_CUDA_FORCE_CUBLAS
)
endif
()
if
(
GGML_CUDA_NO_PEER_COPY
)
add_compile_definitions
(
GGML_CUDA_NO_PEER_COPY
)
endif
()
if
(
CXX_IS_HIPCC
)
set_source_files_properties
(
${
GGML_SOURCES_ROCM
}
PROPERTIES LANGUAGE CXX
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
hip::device
)
else
()
set_source_files_properties
(
${
GGML_SOURCES_ROCM
}
PROPERTIES LANGUAGE HIP
)
endif
()
if
(
GGML_STATIC
)
message
(
FATAL_ERROR
"Static linking not supported for HIP/ROCm"
)
endif
()
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
PUBLIC hip::host roc::rocblas roc::hipblas
)
endif
()
if
(
GGML_SYCL
)
if
(
NOT GGML_SYCL_TARGET MATCHES
"^(INTEL|NVIDIA)$"
)
message
(
FATAL_ERROR
"Invalid backend chosen, supported options are INTEL or NVIDIA"
)
endif
()
check_cxx_compiler_flag
(
"-fsycl"
SUPPORTS_SYCL
)
if
(
DEFINED ENV{ONEAPI_ROOT}
)
message
(
STATUS
"Using oneAPI Release SYCL compiler (icpx)."
)
elseif
(
SUPPORTS_SYCL
)
message
(
WARNING
"Using open-source SYCL compiler (clang++). Didn't detect ENV {ONEAPI_ROOT}.
If you expected the oneAPI Release compiler, please install oneAPI & source it, like:
source /opt/intel/oneapi/setvars.sh"
)
else
()
message
(
FATAL_ERROR,
"C++ compiler lacks SYCL support."
)
endif
()
message
(
STATUS
"SYCL found"
)
#todo: AOT
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_SYCL
)
if
(
GGML_SYCL_F16
)
add_compile_definitions
(
GGML_SYCL_F16
)
endif
()
if
(
GGML_CUDA_FORCE_MMQ
)
add_compile_definitions
(
GGML_SYCL_FORCE_MMQ
)
endif
()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-narrowing -fsycl"
)
if
(
GGML_SYCL_TARGET STREQUAL
"NVIDIA"
)
add_compile_definitions
(
GGML_SYCL_WARP_SIZE=32
)
else
()
add_compile_definitions
(
GGML_SYCL_WARP_SIZE=16
)
endif
()
file
(
GLOB GGML_HEADERS_SYCL
"ggml-sycl/*.hpp"
)
list
(
APPEND GGML_HEADERS_SYCL
"../include/ggml-sycl.h"
)
file
(
GLOB GGML_SOURCES_SYCL
"ggml-sycl/*.cpp"
)
list
(
APPEND GGML_SOURCES_SYCL
"ggml-sycl.cpp"
)
if
(
WIN32
)
find_package
(
IntelSYCL REQUIRED
)
find_package
(
MKL REQUIRED
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL
)
else
()
if
(
GGML_SYCL_TARGET STREQUAL
"INTEL"
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
-fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread
)
elseif
(
GGML_SYCL_TARGET STREQUAL
"NVIDIA"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fsycl-targets=nvptx64-nvidia-cuda"
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
-fsycl pthread m dl onemkl
)
endif
()
endif
()
endif
()
if
(
GGML_RPC
)
message
(
STATUS
"RPC found"
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_RPC
)
if
(
WIN32
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
ws2_32
)
endif
()
set
(
GGML_HEADERS_RPC ../include/ggml-rpc.h
)
set
(
GGML_SOURCES_RPC ggml-rpc.cpp
)
endif
()
if
(
GGML_VULKAN
)
find_package
(
Vulkan COMPONENTS glslc REQUIRED
)
if
(
Vulkan_FOUND
)
message
(
STATUS
"Vulkan found"
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_VULKAN
)
# Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build
# Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector
if
(
MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
)
add_compile_definitions
(
_ITERATOR_DEBUG_LEVEL=0
)
endif
()
if
(
GGML_VULKAN_CHECK_RESULTS
)
add_compile_definitions
(
GGML_VULKAN_CHECK_RESULTS
)
endif
()
if
(
GGML_VULKAN_DEBUG
)
add_compile_definitions
(
GGML_VULKAN_DEBUG
)
endif
()
if
(
GGML_VULKAN_MEMORY_DEBUG
)
add_compile_definitions
(
GGML_VULKAN_MEMORY_DEBUG
)
endif
()
if
(
GGML_VULKAN_VALIDATE
)
add_compile_definitions
(
GGML_VULKAN_VALIDATE
)
endif
()
if
(
GGML_VULKAN_RUN_TESTS
)
add_compile_definitions
(
GGML_VULKAN_RUN_TESTS
)
endif
()
add_subdirectory
(
vulkan-shaders
)
set
(
_ggml_vk_genshaders_cmd vulkan-shaders-gen
)
set
(
_ggml_vk_header
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-vulkan-shaders.hpp
)
set
(
_ggml_vk_source
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-vulkan-shaders.cpp
)
set
(
_ggml_vk_input_dir
${
CMAKE_CURRENT_SOURCE_DIR
}
/vulkan-shaders
)
set
(
_ggml_vk_output_dir
${
CMAKE_CURRENT_BINARY_DIR
}
/vulkan-shaders.spv
)
file
(
GLOB _ggml_vk_shader_deps
"
${
_ggml_vk_input_dir
}
/*.comp"
)
add_custom_command
(
OUTPUT
${
_ggml_vk_header
}
${
_ggml_vk_source
}
COMMAND
${
_ggml_vk_genshaders_cmd
}
--glslc
${
Vulkan_GLSLC_EXECUTABLE
}
--input-dir
${
_ggml_vk_input_dir
}
--output-dir
${
_ggml_vk_output_dir
}
--target-hpp
${
_ggml_vk_header
}
--target-cpp
${
_ggml_vk_source
}
--no-clean
DEPENDS
${
_ggml_vk_shader_deps
}
COMMENT
"Generate vulkan shaders"
)
set
(
GGML_HEADERS_VULKAN
${
CMAKE_CURRENT_SOURCE_DIR
}
/../include/ggml-vulkan.h
${
_ggml_vk_header
}
)
set
(
GGML_SOURCES_VULKAN ggml-vulkan.cpp
${
_ggml_vk_source
}
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
Vulkan::Vulkan
)
set
(
GGML_EXTRA_INCLUDES
${
GGML_EXTRA_INCLUDES
}
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
()
message
(
WARNING
"Vulkan not found"
)
endif
()
endif
()
if
(
GGML_KOMPUTE
)
add_compile_definitions
(
VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1
)
find_package
(
Vulkan COMPONENTS glslc REQUIRED
)
find_program
(
glslc_executable NAMES glslc HINTS Vulkan::glslc
)
if
(
NOT glslc_executable
)
message
(
FATAL_ERROR
"glslc not found"
)
endif
()
function
(
compile_shader
)
set
(
options
)
set
(
oneValueArgs
)
set
(
multiValueArgs SOURCES
)
cmake_parse_arguments
(
compile_shader
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
foreach
(
source
${
compile_shader_SOURCES
}
)
get_filename_component
(
filename
${
source
}
NAME
)
set
(
spv_file
${
filename
}
.spv
)
add_custom_command
(
OUTPUT
${
spv_file
}
DEPENDS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/kompute-shaders/common.comp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kompute-shaders/op_getrows.comp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kompute-shaders/op_mul_mv_q_n_pre.comp
${
CMAKE_CURRENT_SOURCE_DIR
}
/kompute-shaders/op_mul_mv_q_n.comp
COMMAND
${
glslc_executable
}
--target-env=vulkan1.2 -o
${
spv_file
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
source
}
COMMENT
"Compiling
${
source
}
to
${
spv_file
}
"
)
get_filename_component
(
RAW_FILE_NAME
${
spv_file
}
NAME
)
set
(
FILE_NAME
"shader
${
RAW_FILE_NAME
}
"
)
string
(
REPLACE
".comp.spv"
".h"
HEADER_FILE
${
FILE_NAME
}
)
string
(
TOUPPER
${
HEADER_FILE
}
HEADER_FILE_DEFINE
)
string
(
REPLACE
"."
"_"
HEADER_FILE_DEFINE
"
${
HEADER_FILE_DEFINE
}
"
)
set
(
OUTPUT_HEADER_FILE
"
${
HEADER_FILE
}
"
)
message
(
STATUS
"
${
HEADER_FILE
}
generating
${
HEADER_FILE_DEFINE
}
"
)
if
(
CMAKE_GENERATOR MATCHES
"Visual Studio"
)
add_custom_command
(
OUTPUT
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/"
>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"namespace kp {"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"namespace shader_data {"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_BINARY_DIR
}
/bin/$<CONFIG>/xxd -i
${
RAW_FILE_NAME
}
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"}}"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
DEPENDS
${
spv_file
}
xxd
COMMENT
"Converting to hpp:
${
FILE_NAME
}
${
CMAKE_BINARY_DIR
}
/bin/$<CONFIG>/xxd"
)
else
()
add_custom_command
(
OUTPUT
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/"
>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"namespace kp {"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"namespace shader_data {"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_BINARY_DIR
}
/bin/xxd -i
${
RAW_FILE_NAME
}
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo
"}}"
>>
${
OUTPUT_HEADER_FILE
}
COMMAND
${
CMAKE_COMMAND
}
-E echo \"\
#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE}
DEPENDS
${
spv_file
}
xxd
COMMENT
"Converting to hpp:
${
FILE_NAME
}
${
CMAKE_BINARY_DIR
}
/bin/xxd"
)
endif
()
endforeach
()
endfunction
()
if
(
EXISTS
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/kompute/CMakeLists.txt"
)
message
(
STATUS
"Kompute found"
)
set
(
KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING
"Kompute log level"
)
add_subdirectory
(
kompute
)
# Compile our shaders
compile_shader
(
SOURCES
kompute-shaders/op_scale.comp
kompute-shaders/op_scale_8.comp
kompute-shaders/op_add.comp
kompute-shaders/op_addrow.comp
kompute-shaders/op_mul.comp
kompute-shaders/op_silu.comp
kompute-shaders/op_relu.comp
kompute-shaders/op_gelu.comp
kompute-shaders/op_softmax.comp
kompute-shaders/op_norm.comp
kompute-shaders/op_rmsnorm.comp
kompute-shaders/op_diagmask.comp
kompute-shaders/op_mul_mat_mat_f32.comp
kompute-shaders/op_mul_mat_f16.comp
kompute-shaders/op_mul_mat_q8_0.comp
kompute-shaders/op_mul_mat_q4_0.comp
kompute-shaders/op_mul_mat_q4_1.comp
kompute-shaders/op_mul_mat_q6_k.comp
kompute-shaders/op_getrows_f32.comp
kompute-shaders/op_getrows_f16.comp
kompute-shaders/op_getrows_q4_0.comp
kompute-shaders/op_getrows_q4_1.comp
kompute-shaders/op_getrows_q6_k.comp
kompute-shaders/op_rope_f16.comp
kompute-shaders/op_rope_f32.comp
kompute-shaders/op_cpy_f16_f16.comp
kompute-shaders/op_cpy_f16_f32.comp
kompute-shaders/op_cpy_f32_f16.comp
kompute-shaders/op_cpy_f32_f32.comp
)
# Create a custom target for our generated shaders
add_custom_target
(
generated_shaders DEPENDS
shaderop_scale.h
shaderop_scale_8.h
shaderop_add.h
shaderop_addrow.h
shaderop_mul.h
shaderop_silu.h
shaderop_relu.h
shaderop_gelu.h
shaderop_softmax.h
shaderop_norm.h
shaderop_rmsnorm.h
shaderop_diagmask.h
shaderop_mul_mat_mat_f32.h
shaderop_mul_mat_f16.h
shaderop_mul_mat_q8_0.h
shaderop_mul_mat_q4_0.h
shaderop_mul_mat_q4_1.h
shaderop_mul_mat_q6_k.h
shaderop_getrows_f32.h
shaderop_getrows_f16.h
shaderop_getrows_q4_0.h
shaderop_getrows_q4_1.h
shaderop_getrows_q6_k.h
shaderop_rope_f16.h
shaderop_rope_f32.h
shaderop_cpy_f16_f16.h
shaderop_cpy_f16_f32.h
shaderop_cpy_f32_f16.h
shaderop_cpy_f32_f32.h
)
# Create a custom command that depends on the generated_shaders
add_custom_command
(
OUTPUT
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-kompute.stamp
COMMAND
${
CMAKE_COMMAND
}
-E touch
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-kompute.stamp
DEPENDS generated_shaders
COMMENT
"Ensuring shaders are generated before compiling ggml-kompute.cpp"
)
# Add the stamp to the main sources to ensure dependency tracking
set
(
GGML_SOURCES_KOMPUTE ggml-kompute.cpp
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-kompute.stamp
)
set
(
GGML_HEADERS_KOMPUTE ../include/ggml-kompute.h
${
CMAKE_CURRENT_BINARY_DIR
}
/ggml-kompute.stamp
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_KOMPUTE
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
kompute
)
set
(
GGML_EXTRA_INCLUDES
${
GGML_EXTRA_INCLUDES
}
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
()
message
(
WARNING
"Kompute not found"
)
endif
()
endif
()
if
(
GGML_CPU_HBM
)
find_library
(
memkind memkind REQUIRED
)
message
(
STATUS
"Using memkind for CPU HBM"
)
add_compile_definitions
(
GGML_USE_CPU_HBM
)
target_link_libraries
(
ggml PUBLIC memkind
)
endif
()
if
(
GGML_CANN
)
if
(
"cann
${
CANN_INSTALL_DIR
}
"
STREQUAL
"cann"
AND DEFINED ENV{ASCEND_TOOLKIT_HOME}
)
set
(
CANN_INSTALL_DIR $ENV{ASCEND_TOOLKIT_HOME}
)
message
(
STATUS
"CANN: updated CANN_INSTALL_DIR from ASCEND_TOOLKIT_HOME=$ENV{ASCEND_TOOLKIT_HOME}"
)
endif
()
if
(
CANN_INSTALL_DIR
)
# Only Support Linux.
if
(
GGML_CANN
)
if
(
NOT UNIX
)
set
(
GGML_CANN OFF
)
message
(
WARNING
"CANN: CANN toolkit supports unix but not
${
CMAKE_SYSTEM_NAME
}
. Turning off GGML_CANN"
)
endif
()
endif
()
# Supported platforms: x86-64, arm64
if
(
GGML_CANN
)
if
(
CMAKE_SYSTEM_PROCESSOR STREQUAL
"aarch64"
)
elseif
(
CMAKE_SYSTEM_PROCESSOR STREQUAL
"x86_64"
OR CMAKE_SYSTEM_PROCESSOR STREQUAL
"amd64"
)
else
()
set
(
GGML_CANN OFF
)
message
(
WARNING
"CANN: CANN toolkit supports x86-64 and arm64 but not
${
CMAKE_SYSTEM_PROCESSOR
}
. Turning off GGML_CANN"
)
endif
()
endif
()
# Set header and libs
if
(
GGML_CANN
)
set
(
CANN_INCLUDE_DIRS
${
CANN_INSTALL_DIR
}
/include
${
CANN_INSTALL_DIR
}
/include/aclnn
${
CANN_INSTALL_DIR
}
/acllib/include
)
add_subdirectory
(
ggml-cann/kernels
)
list
(
APPEND CANN_LIBRARIES
ascendcl
nnopbase
opapi
acl_op_compiler
ascendc_kernels
)
set
(
GGML_HEADERS_CANN
"../include/ggml-cann.h"
)
file
(
GLOB GGML_SOURCES_CANN
"ggml-cann/*.cpp"
)
list
(
APPEND GGML_SOURCES_CANN
"ggml-cann.cpp"
)
message
(
STATUS
"CANN: CANN_INCLUDE_DIRS =
${
CANN_INCLUDE_DIRS
}
"
)
message
(
STATUS
"CANN: CANN_LIBRARIES =
${
CANN_LIBRARIES
}
"
)
set
(
GGML_EXTRA_LIBS
${
GGML_EXTRA_LIBS
}
${
CANN_LIBRARIES
}
)
set
(
GGML_EXTRA_INCLUDES
${
GGML_EXTRA_INCLUDES
}
${
CANN_INCLUDE_DIRS
}
)
set
(
GGML_EXTRA_LIBDIRS
${
GGML_EXTRA_LIBDIRS
}
${
CANN_INSTALL_DIR
}
/lib64
)
list
(
APPEND GGML_CDEF_PUBLIC GGML_USE_CANN
)
endif
()
else
()
set
(
GGML_CANN OFF
)
message
(
WARNING
"CANN: Can't find CANN_INSTALL_DIR, do you forget to source set_var.sh. Turning off GGML_CANN"
)
endif
()
if
(
NOT GGML_CANN
)
message
(
WARNING
"CANN: GGML_CANN is turned OFF, see above for details."
)
endif
()
endif
()
function
(
get_flags CCID CCVER
)
set
(
C_FLAGS
""
)
set
(
CXX_FLAGS
""
)
...
...
@@ -897,12 +41,6 @@ function(get_flags CCID CCVER)
elseif
(
CCID STREQUAL
"GNU"
)
set
(
C_FLAGS -Wdouble-promotion
)
set
(
CXX_FLAGS -Wno-array-bounds
)
if
(
NOT GGML_MUSA
)
if
(
CCVER VERSION_GREATER_EQUAL 7.1.0
)
list
(
APPEND CXX_FLAGS -Wno-format-truncation
)
endif
()
endif
()
if
(
CCVER VERSION_GREATER_EQUAL 8.1.0
)
list
(
APPEND CXX_FLAGS -Wextra-semi
)
endif
()
...
...
@@ -942,54 +80,6 @@ if (GGML_ALL_WARNINGS)
endif
()
endif
()
set
(
CUDA_CXX_FLAGS
""
)
if
(
GGML_CUDA
)
set
(
CUDA_FLAGS -use_fast_math
)
if
(
GGML_FATAL_WARNINGS
)
list
(
APPEND CUDA_FLAGS -Werror all-warnings
)
endif
()
if
(
GGML_ALL_WARNINGS AND NOT MSVC
)
set
(
NVCC_CMD
${
CMAKE_CUDA_COMPILER
}
.c
)
if
(
NOT CMAKE_CUDA_HOST_COMPILER STREQUAL
""
)
list
(
APPEND NVCC_CMD -ccbin
${
CMAKE_CUDA_HOST_COMPILER
}
)
endif
()
execute_process
(
COMMAND
${
NVCC_CMD
}
-Xcompiler --version
OUTPUT_VARIABLE CUDA_CCFULLVER
ERROR_QUIET
)
if
(
NOT CUDA_CCFULLVER MATCHES clang
)
set
(
CUDA_CCID
"GNU"
)
execute_process
(
COMMAND
${
NVCC_CMD
}
-Xcompiler
"-dumpfullversion -dumpversion"
OUTPUT_VARIABLE CUDA_CCVER
ERROR_QUIET
)
else
()
if
(
CUDA_CCFULLVER MATCHES Apple
)
set
(
CUDA_CCID
"AppleClang"
)
else
()
set
(
CUDA_CCID
"Clang"
)
endif
()
string
(
REGEX REPLACE
"^.* version ([0-9.]*).*$"
"
\\
1"
CUDA_CCVER
${
CUDA_CCFULLVER
}
)
endif
()
message
(
"-- CUDA host compiler is
${
CUDA_CCID
}
${
CUDA_CCVER
}
"
)
get_flags
(
${
CUDA_CCID
}
${
CUDA_CCVER
}
)
list
(
APPEND CUDA_CXX_FLAGS
${
CXX_FLAGS
}
${
GF_CXX_FLAGS
}
)
# This is passed to -Xcompiler later
endif
()
if
(
NOT MSVC
)
list
(
APPEND CUDA_CXX_FLAGS -Wno-pedantic
)
endif
()
endif
()
if
(
GGML_LTO
)
include
(
CheckIPOSupported
)
check_ipo_supported
(
RESULT result OUTPUT output
)
...
...
@@ -1047,167 +137,6 @@ if (NOT MSVC)
endif
()
endif
()
set
(
ARCH_FLAGS
""
)
if
(
CMAKE_OSX_ARCHITECTURES STREQUAL
"arm64"
OR
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL
"arm64"
OR
(
NOT CMAKE_OSX_ARCHITECTURES AND
NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES
"^(aarch64|arm.*|ARM64)$"
))
message
(
STATUS
"ARM detected"
)
if
(
MSVC
)
add_compile_definitions
(
__aarch64__
)
# MSVC defines _M_ARM64 instead
add_compile_definitions
(
__ARM_NEON
)
add_compile_definitions
(
__ARM_FEATURE_FMA
)
set
(
CMAKE_REQUIRED_FLAGS_PREV
${
CMAKE_REQUIRED_FLAGS
}
)
string
(
JOIN
" "
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS
}
"/arch:armv8.2"
)
check_cxx_source_compiles
(
"#include <arm_neon.h>
\n
int main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }"
GGML_COMPILER_SUPPORT_DOTPROD
)
if
(
GGML_COMPILER_SUPPORT_DOTPROD
)
add_compile_definitions
(
__ARM_FEATURE_DOTPROD
)
endif
()
check_cxx_source_compiles
(
"#include <arm_neon.h>
\n
int main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }"
GGML_COMPILER_SUPPORT_MATMUL_INT8
)
if
(
GGML_COMPILER_SUPPORT_MATMUL_INT8
)
add_compile_definitions
(
__ARM_FEATURE_MATMUL_INT8
)
endif
()
check_cxx_source_compiles
(
"#include <arm_neon.h>
\n
int main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }"
GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC
)
if
(
GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC
)
add_compile_definitions
(
__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
)
endif
()
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_PREV
}
)
else
()
check_cxx_compiler_flag
(
-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E
)
if
(
NOT
"
${
COMPILER_SUPPORTS_FP16_FORMAT_I3E
}
"
STREQUAL
""
)
list
(
APPEND ARCH_FLAGS -mfp16-format=ieee
)
endif
()
if
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"armv6"
)
# Raspberry Pi 1, Zero
list
(
APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access
)
endif
()
if
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"armv7"
)
if
(
"
${
CMAKE_SYSTEM_NAME
}
"
STREQUAL
"Android"
)
# Android armeabi-v7a
list
(
APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations
)
else
()
# Raspberry Pi 2
list
(
APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations
)
endif
()
endif
()
if
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"armv8"
)
# Android arm64-v8a
# Raspberry Pi 3, 4, Zero 2 (32-bit)
list
(
APPEND ARCH_FLAGS -mno-unaligned-access
)
endif
()
if
(
GGML_SVE
)
list
(
APPEND ARCH_FLAGS -march=armv8.6-a+sve
)
endif
()
endif
()
elseif
(
CMAKE_OSX_ARCHITECTURES STREQUAL
"x86_64"
OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES
"^(x86_64|i686|amd64|x64|win32)$"
OR
(
NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
CMAKE_SYSTEM_PROCESSOR MATCHES
"^(x86_64|i686|AMD64)$"
))
message
(
STATUS
"x86 detected"
)
if
(
MSVC
)
# instruction set detection for MSVC only
if
(
GGML_NATIVE
)
# TODO: improve, should not reference files from the parent folder
include
(
../cmake/FindSIMD.cmake
)
endif
()
if
(
GGML_AVX512
)
list
(
APPEND ARCH_FLAGS /arch:AVX512
)
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
# Do it manually.
if
(
GGML_AVX512_VBMI
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>
)
endif
()
if
(
GGML_AVX512_VNNI
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>
)
endif
()
if
(
GGML_AVX512_BF16
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>
)
add_compile_definitions
(
$<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>
)
endif
()
elseif
(
GGML_AVX2
)
list
(
APPEND ARCH_FLAGS /arch:AVX2
)
elseif
(
GGML_AVX
)
list
(
APPEND ARCH_FLAGS /arch:AVX
)
endif
()
else
()
if
(
GGML_NATIVE
)
list
(
APPEND ARCH_FLAGS -march=native
)
endif
()
if
(
GGML_F16C
)
list
(
APPEND ARCH_FLAGS -mf16c
)
endif
()
if
(
GGML_FMA
)
list
(
APPEND ARCH_FLAGS -mfma
)
endif
()
if
(
GGML_AVX
)
list
(
APPEND ARCH_FLAGS -mavx
)
endif
()
if
(
GGML_AVX2
)
list
(
APPEND ARCH_FLAGS -mavx2
)
endif
()
if
(
GGML_AVX512
)
list
(
APPEND ARCH_FLAGS -mavx512f
)
list
(
APPEND ARCH_FLAGS -mavx512bw
)
endif
()
if
(
GGML_AVX512_VBMI
)
list
(
APPEND ARCH_FLAGS -mavx512vbmi
)
endif
()
if
(
GGML_AVX512_VNNI
)
list
(
APPEND ARCH_FLAGS -mavx512vnni
)
endif
()
if
(
GGML_AVX512_BF16
)
list
(
APPEND ARCH_FLAGS -mavx512bf16
)
endif
()
endif
()
elseif
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"ppc64"
)
message
(
STATUS
"PowerPC detected"
)
if
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"ppc64le"
)
list
(
APPEND ARCH_FLAGS -mcpu=powerpc64le
)
else
()
list
(
APPEND ARCH_FLAGS -mcpu=native -mtune=native
)
#TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
endif
()
elseif
(
${
CMAKE_SYSTEM_PROCESSOR
}
MATCHES
"loongarch64"
)
message
(
STATUS
"loongarch64 detected"
)
list
(
APPEND ARCH_FLAGS -march=loongarch64
)
if
(
GGML_LASX
)
list
(
APPEND ARCH_FLAGS -mlasx
)
endif
()
if
(
GGML_LSX
)
list
(
APPEND ARCH_FLAGS -mlsx
)
endif
()
else
()
message
(
STATUS
"Unknown architecture"
)
endif
()
add_compile_options
(
"$<$<COMPILE_LANGUAGE:CXX>:
${
ARCH_FLAGS
}
>"
)
add_compile_options
(
"$<$<COMPILE_LANGUAGE:C>:
${
ARCH_FLAGS
}
>"
)
if
(
GGML_CUDA
)
list
(
APPEND CUDA_CXX_FLAGS
${
ARCH_FLAGS
}
)
list
(
JOIN CUDA_CXX_FLAGS
" "
CUDA_CXX_FLAGS_JOINED
)
# pass host compiler flags as a single argument
if
(
NOT CUDA_CXX_FLAGS_JOINED STREQUAL
""
)
list
(
APPEND CUDA_FLAGS -Xcompiler
${
CUDA_CXX_FLAGS_JOINED
}
)
endif
()
add_compile_options
(
"$<$<COMPILE_LANGUAGE:CUDA>:
${
CUDA_FLAGS
}
>"
)
endif
()
if
(
MINGW
)
# Target Windows 8 for PrefetchVirtualMemory
add_compile_definitions
(
_WIN32_WINNT=
${
GGML_WIN_VER
}
)
...
...
@@ -1221,19 +150,19 @@ endif()
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
# posix_memalign came in POSIX.1-2001 / SUSv3
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
add_compile_definitions
(
_XOPEN_SOURCE=600
)
# Somehow in OpenBSD whenever POSIX conformance is specified
# some string functions rely on locale_t availability,
# which was introduced in POSIX.1-2008, forcing us to go higher
if
(
CMAKE_SYSTEM_NAME MATCHES
"OpenBSD"
)
remove_definitions
(
-D_XOPEN_SOURCE=600
)
add_compile_definitions
(
_XOPEN_SOURCE=700
)
else
()
add_compile_definitions
(
_XOPEN_SOURCE=600
)
endif
()
# Data types, macros and functions related to controlling CPU affinity and
# some memory allocation are available on Linux through GNU extensions in libc
if
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
)
if
(
CMAKE_SYSTEM_NAME MATCHES
"Linux"
OR CMAKE_SYSTEM_NAME MATCHES
"Android"
)
add_compile_definitions
(
_GNU_SOURCE
)
endif
()
...
...
@@ -1271,55 +200,89 @@ if (WIN32)
endif
()
endif
()
#
# libraries
#
# ggml
add_library
(
ggml
add_library
(
ggml
-base
../include/ggml.h
../include/ggml-alloc.h
../include/ggml-backend.h
../include/ggml-cpp.h
../include/ggml-opt.h
ggml.c
ggml-alloc.c
ggml-backend.c
ggml-backend.cpp
ggml-opt.cpp
ggml-threading.cpp
ggml-threading.h
ggml-quants.c
ggml-quants.h
${
GGML_SOURCES_CUDA
}
${
GGML_HEADERS_CUDA
}
${
GGML_SOURCES_METAL
}
${
GGML_HEADERS_METAL
}
${
GGML_SOURCES_RPC
}
${
GGML_HEADERS_RPC
}
${
GGML_SOURCES_EXTRA
}
${
GGML_HEADERS_EXTRA
}
${
GGML_SOURCES_SYCL
}
${
GGML_HEADERS_SYCL
}
${
GGML_SOURCES_KOMPUTE
}
${
GGML_HEADERS_KOMPUTE
}
${
GGML_SOURCES_VULKAN
}
${
GGML_HEADERS_VULKAN
}
${
GGML_SOURCES_ROCM
}
${
GGML_HEADERS_ROCM
}
${
GGML_SOURCES_BLAS
}
${
GGML_HEADERS_BLAS
}
${
GGML_SOURCES_LLAMAFILE
}
${
GGML_HEADERS_LLAMAFILE
}
${
GGML_SOURCES_CANN
}
${
GGML_HEADERS_CANN
}
ggml-aarch64.c ggml-aarch64.h
)
ggml-aarch64.c
ggml-aarch64.h
)
if
(
EMSCRIPTEN
)
set_target_properties
(
ggml PROPERTIES COMPILE_FLAGS
"-msimd128"
)
endif
()
target_include_directories
(
ggml-base PRIVATE .
)
target_compile_definitions
(
ggml PUBLIC
${
GGML_CDEF_PUBLIC
}
)
target_include_directories
(
ggml PUBLIC ../include
)
target_include_directories
(
ggml PRIVATE .
${
GGML_EXTRA_INCLUDES
}
)
target_link_directories
(
ggml PRIVATE
${
GGML_EXTRA_LIBDIRS
}
)
target_compile_features
(
ggml PRIVATE c_std_11
)
# don't bump
add_library
(
ggml
ggml-backend-reg.cpp
)
target_link_libraries
(
ggml PUBLIC ggml-base
)
function
(
ggml_add_backend backend
)
string
(
TOUPPER
"GGML_
${
backend
}
"
backend_id
)
if
(
${
backend_id
}
)
string
(
TOLOWER
"ggml-
${
backend
}
"
backend_target
)
add_subdirectory
(
${
backend_target
}
)
# check again in case the backend disabled itself
# note that this should NOT be the normal behavior, in case of errors the backend should fail the build
# however, currently it is necessary for AMX, since it is enabled by default on llama.cpp
if
(
${
backend_id
}
)
message
(
STATUS
"Including
${
backend
}
backend"
)
if
(
${
BUILD_SHARED_LIBS
}
)
target_compile_definitions
(
${
backend_target
}
PRIVATE GGML_BACKEND_BUILD
)
target_compile_definitions
(
${
backend_target
}
PUBLIC GGML_BACKEND_SHARED
)
endif
()
install
(
TARGETS
${
backend_target
}
LIBRARY
)
target_link_libraries
(
ggml PUBLIC
${
backend_target
}
)
string
(
TOUPPER
"GGML_USE_
${
backend
}
"
backend_use
)
target_compile_definitions
(
ggml PUBLIC
${
backend_use
}
)
endif
()
endif
()
endfunction
()
target_link_libraries
(
ggml PRIVATE Threads::Threads
${
GGML_EXTRA_LIBS
}
)
ggml_add_backend
(
CPU
)
ggml_add_backend
(
AMX
)
ggml_add_backend
(
BLAS
)
ggml_add_backend
(
CANN
)
ggml_add_backend
(
CUDA
)
ggml_add_backend
(
HIP
)
ggml_add_backend
(
Kompute
)
ggml_add_backend
(
METAL
)
ggml_add_backend
(
RPC
)
ggml_add_backend
(
SYCL
)
ggml_add_backend
(
Vulkan
)
ggml_add_backend
(
MUSA
)
foreach
(
target ggml-base ggml
)
target_include_directories
(
${
target
}
PUBLIC $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/../include> $<INSTALL_INTERFACE:include>
)
target_compile_features
(
${
target
}
PRIVATE c_std_11
)
# don't bump
endforeach
()
target_link_libraries
(
ggml-base PRIVATE Threads::Threads
)
find_library
(
MATH_LIBRARY m
)
if
(
MATH_LIBRARY
)
if
(
NOT WIN32 OR NOT
GGML_SYCL
)
target_link_libraries
(
ggml PRIVATE
${
MATH_LIBRARY
}
)
if
(
NOT WIN32 OR NOT
DEFINED ENV{ONEAPI_ROOT}
)
target_link_libraries
(
ggml
-base
PRIVATE
m
)
endif
()
endif
()
if
(
CMAKE_SYSTEM_NAME MATCHES
"Android"
)
target_link_libraries
(
ggml-base PRIVATE dl
)
endif
()
if
(
BUILD_SHARED_LIBS
)
set_target_properties
(
ggml PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_definitions
(
ggml PRIVATE GGML_SHARED GGML_BUILD
)
foreach
(
target ggml-base ggml
)
set_target_properties
(
${
target
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_definitions
(
${
target
}
PRIVATE GGML_BUILD
)
target_compile_definitions
(
${
target
}
PUBLIC GGML_SHARED
)
endforeach
()
endif
()
Prev
1
…
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment