Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
22885aea
Commit
22885aea
authored
Aug 12, 2023
by
Jeffrey Morgan
Browse files
update `llama.cpp` to `f64d44a`
parent
ed969d2a
Changes
19
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
2185 additions
and
1061 deletions
+2185
-1061
llm/ggml-alloc.c
llm/ggml-alloc.c
+9
-1
llm/ggml-alloc.h
llm/ggml-alloc.h
+1
-1
llm/ggml-cuda.cu
llm/ggml-cuda.cu
+1336
-694
llm/ggml-cuda.h
llm/ggml-cuda.h
+1
-1
llm/ggml-metal.h
llm/ggml-metal.h
+1
-1
llm/ggml-metal.m
llm/ggml-metal.m
+40
-19
llm/ggml-metal.metal
llm/ggml-metal.metal
+1
-1
llm/ggml-mpi.c
llm/ggml-mpi.c
+1
-1
llm/ggml-mpi.h
llm/ggml-mpi.h
+1
-1
llm/ggml-opencl.cpp
llm/ggml-opencl.cpp
+1
-1
llm/ggml-opencl.h
llm/ggml-opencl.h
+1
-1
llm/ggml.c
llm/ggml.c
+397
-151
llm/ggml.h
llm/ggml.h
+114
-33
llm/k_quants.c
llm/k_quants.c
+1
-1
llm/k_quants.h
llm/k_quants.h
+1
-1
llm/llama-util.h
llm/llama-util.h
+42
-2
llm/llama.cpp
llm/llama.cpp
+215
-147
llm/llama.go
llm/llama.go
+3
-2
llm/llama.h
llm/llama.h
+19
-2
No files found.
llm/ggml-alloc.c
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
@@ -420,6 +420,14 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
if
(
parent
==
NULL
)
{
break
;
}
// if the node's data is external, then we cannot re-use it
if
((
char
*
)
parent
->
data
<
(
char
*
)
alloc
->
data
||
(
char
*
)
parent
->
data
>=
((
char
*
)
alloc
->
data
+
alloc
->
size
))
{
AT_PRINTF
(
"not reusing parent %s for %s as %p is external
\n
"
,
parent
->
name
,
node
->
name
,
parent
->
data
);
continue
;
}
struct
hash_node
*
p_hn
=
hash_get
(
ht
,
parent
);
if
(
parent
->
data
!=
NULL
&&
p_hn
->
n_children
==
1
&&
p_hn
->
n_views
==
0
&&
ggml_are_same_layout
(
node
,
parent
))
{
if
(
ggml_is_view
(
parent
))
{
...
...
llm/ggml-alloc.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-cuda.cu
View file @
22885aea
This diff is collapsed.
Click to expand it.
llm/ggml-cuda.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-metal.h
View file @
22885aea
//go:build darwin
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-metal.m
View file @
22885aea
//go:build darwin
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
@@ -35,6 +35,11 @@
#import <Metal/Metal.h>
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#ifdef GGML_METAL_NDEBUG
#define metal_printf(...)
#else
...
...
@@ -43,6 +48,8 @@
#define UNUSED(x) (void)(x)
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
struct
ggml_metal_buffer
{
const
char
*
name
;
...
...
@@ -64,7 +71,7 @@ struct ggml_metal_context {
int
n_buffers
;
struct
ggml_metal_buffer
buffers
[
GGML_METAL_MAX_BUFFERS
];
int
concur_list
[
GGML_MAX_
NODES
];
int
concur_list
[
GGML_MAX_
CONCUR
];
int
concur_list_len
;
// custom kernels
...
...
@@ -398,15 +405,15 @@ void ggml_metal_graph_find_concurrency(
struct
ggml_metal_context
*
ctx
,
struct
ggml_cgraph
*
gf
)
{
int
search_depth
=
gf
->
n_nodes
;
//we only find concurrency in this range to avoid wasting too much time
int
nodes_unused
[
GGML_MAX_
NODES
];
int
nodes_unused
[
GGML_MAX_
CONCUR
];
for
(
int
i
=
0
;
i
<
GGML_MAX_
NODES
;
i
++
)
{
ctx
->
concur_list
[
i
]
=
0
;}
for
(
int
i
=
0
;
i
<
gf
->
n_nodes
;
i
++
)
{
nodes_unused
[
i
]
=
1
;}
for
(
int
i
=
0
;
i
<
GGML_MAX_
CONCUR
;
i
++
)
{
ctx
->
concur_list
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
gf
->
n_nodes
;
i
++
)
{
nodes_unused
[
i
]
=
1
;
}
ctx
->
concur_list_len
=
0
;
int
n_left
=
gf
->
n_nodes
;
int
n_start
=
0
;
// all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
int
level_pos
=
0
;
// at ctx->concur_list, the last layer (level) ends at level_pos
int
n_left
=
gf
->
n_nodes
;
int
n_start
=
0
;
// all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
int
level_pos
=
0
;
// at ctx->concur_list, the last layer (level) ends at level_pos
while
(
n_left
>
0
)
{
// number of nodes at a layer (that can be issued concurrently)
...
...
@@ -414,28 +421,40 @@ void ggml_metal_graph_find_concurrency(
for
(
int
i
=
n_start
;
i
<
((
n_start
+
search_depth
>
gf
->
n_nodes
)
?
gf
->
n_nodes
:
n_start
+
search_depth
);
i
++
)
{
if
(
nodes_unused
[
i
])
{
// if the requirements for gf->nodes[i] are satisfied
int
exe_flag
=
1
;
int
exe_flag
=
1
;
// scan all srcs
for
(
int
src_ind
=
0
;
src_ind
<
GGML_MAX_SRC
;
src_ind
++
)
{
struct
ggml_tensor
*
src_cur
=
gf
->
nodes
[
i
]
->
src
[
src_ind
];
if
(
src_cur
)
{
// if is leaf nodes it's satisfied.
if
(
src_cur
->
op
==
GGML_OP_NONE
&&
src_cur
->
grad
==
NULL
)
{
continue
;}
// TODO: ggml_is_leaf()
if
(
src_cur
->
op
==
GGML_OP_NONE
&&
src_cur
->
grad
==
NULL
)
{
continue
;
}
// otherwise this src should be the output from previous nodes.
int
is_found
=
0
;
// scan 2*search_depth back because we inserted barrier.
for
(
int
j
=
((
level_pos
-
2
*
search_depth
)
<
0
?
0
:
(
level_pos
-
2
*
search_depth
));
j
<
level_pos
;
j
++
)
{
if
(
gf
->
nodes
[
ctx
->
concur_list
[
j
]]
==
src_cur
)
{
is_found
=
1
;
break
;}
//for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
for
(
int
j
=
MAX
(
0
,
level_pos
-
2
*
search_depth
);
j
<
level_pos
;
j
++
)
{
if
(
ctx
->
concur_list
[
j
]
>=
0
&&
gf
->
nodes
[
ctx
->
concur_list
[
j
]]
==
src_cur
)
{
is_found
=
1
;
break
;
}
}
if
(
is_found
==
0
)
{
exe_flag
=
0
;
break
;
}
if
(
is_found
==
0
)
{
exe_flag
=
0
;
break
;}
}
}
if
(
exe_flag
)
{
// check if nodes[i]'s data will be overwritten by a node before nodes[i].
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
int64_t
data_start
=
(
int64_t
)
gf
->
nodes
[
i
]
->
data
;
int64_t
length
=
(
int64_t
)
ggml_nbytes
(
gf
->
nodes
[
i
]);
int64_t
length
=
(
int64_t
)
ggml_nbytes
(
gf
->
nodes
[
i
]);
for
(
int
j
=
n_start
;
j
<
i
;
j
++
)
{
if
(
nodes_unused
[
j
]
&&
gf
->
nodes
[
j
]
->
op
!=
GGML_OP_RESHAPE
\
&&
gf
->
nodes
[
j
]
->
op
!=
GGML_OP_VIEW
\
...
...
@@ -444,9 +463,9 @@ void ggml_metal_graph_find_concurrency(
if
(((
int64_t
)
gf
->
nodes
[
j
]
->
data
)
>=
data_start
+
length
||
\
((
int64_t
)
gf
->
nodes
[
j
]
->
data
)
+
(
int64_t
)
ggml_nbytes
(
gf
->
nodes
[
j
])
<=
data_start
)
{
continue
;
}
else
{
exe_flag
=
0
;
}
exe_flag
=
0
;
}
}
}
...
...
@@ -463,11 +482,13 @@ void ggml_metal_graph_find_concurrency(
ctx
->
concur_list
[
level_pos
+
concurrency
]
=
-
1
;
ctx
->
concur_list_len
++
;
// jump all sorted nodes at nodes_bak
while
(
!
nodes_unused
[
n_start
])
{
n_start
++
;}
while
(
!
nodes_unused
[
n_start
])
{
n_start
++
;
}
level_pos
+=
concurrency
+
1
;
}
if
(
ctx
->
concur_list_len
>
GGML_MAX_
NODES
)
{
if
(
ctx
->
concur_list_len
>
GGML_MAX_
CONCUR
)
{
fprintf
(
stderr
,
"%s: too many elements for metal ctx->concur_list!
\n
"
,
__func__
);
}
}
...
...
@@ -481,7 +502,7 @@ void ggml_metal_graph_compute(
// else fallback to serial dispatch
MTLComputePassDescriptor
*
edesc
=
MTLComputePassDescriptor
.
computePassDescriptor
;
const
bool
has_concur
=
ctx
->
concur_list_len
&&
ctx
->
concur_list_len
<=
GGML_MAX_
NODES
;
const
bool
has_concur
=
ctx
->
concur_list_len
&&
ctx
->
concur_list_len
<=
GGML_MAX_
CONCUR
;
const
int
n_nodes
=
has_concur
?
ctx
->
concur_list_len
:
gf
->
n_nodes
;
edesc
.
dispatchType
=
has_concur
?
MTLDispatchTypeConcurrent
:
MTLDispatchTypeSerial
;
...
...
llm/ggml-metal.metal
View file @
22885aea
//go:build darwin
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-mpi.c
View file @
22885aea
//go:build mpi
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-mpi.h
View file @
22885aea
//go:build mpi
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-opencl.cpp
View file @
22885aea
//go:build opencl
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml-opencl.h
View file @
22885aea
//go:build opencl
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/ggml.c
View file @
22885aea
This diff is collapsed.
Click to expand it.
llm/ggml.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
@@ -209,6 +209,15 @@
# define GGML_API
#endif
// TODO: support for clang
#ifdef __GNUC__
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
#elif defined(_MSC_VER)
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
#else
# define GGML_DEPRECATED(func, hint) func
#endif
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
...
...
@@ -400,6 +409,10 @@ extern "C" {
GGML_OP_MAP_UNARY
,
GGML_OP_MAP_BINARY
,
GGML_OP_MAP_CUSTOM1_F32
,
GGML_OP_MAP_CUSTOM2_F32
,
GGML_OP_MAP_CUSTOM3_F32
,
GGML_OP_MAP_CUSTOM1
,
GGML_OP_MAP_CUSTOM2
,
GGML_OP_MAP_CUSTOM3
,
...
...
@@ -596,6 +609,8 @@ extern "C" {
GGML_API
bool
ggml_is_contiguous
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_is_permuted
(
const
struct
ggml_tensor
*
tensor
);
GGML_API
bool
ggml_are_same_shape
(
const
struct
ggml_tensor
*
t0
,
const
struct
ggml_tensor
*
t1
);
// use this to compute the memory overhead of a tensor
GGML_API
size_t
ggml_tensor_overhead
(
void
);
...
...
@@ -1266,7 +1281,7 @@ extern "C" {
// conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API
struct
ggml_tensor
*
ggml_conv_1d_ph
(
GGML_API
struct
ggml_tensor
*
ggml_conv_1d_ph
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
...
...
@@ -1279,7 +1294,7 @@ extern "C" {
GGML_OP_POOL_COUNT
,
};
GGML_API
struct
ggml_tensor
*
ggml_pool_1d
(
GGML_API
struct
ggml_tensor
*
ggml_pool_1d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
enum
ggml_op_pool
op
,
...
...
@@ -1287,7 +1302,7 @@ extern "C" {
int
s0
,
// stride
int
p0
);
// padding
GGML_API
struct
ggml_tensor
*
ggml_pool_2d
(
GGML_API
struct
ggml_tensor
*
ggml_pool_2d
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
enum
ggml_op_pool
op
,
...
...
@@ -1341,15 +1356,6 @@ extern "C" {
int
h0
,
int
w
);
// custom operators
typedef
void
(
*
ggml_unary_op_f32_t
)
(
const
int
,
float
*
,
const
float
*
);
typedef
void
(
*
ggml_binary_op_f32_t
)(
const
int
,
float
*
,
const
float
*
,
const
float
*
);
typedef
void
(
*
ggml_custom1_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
typedef
void
(
*
ggml_custom2_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
typedef
void
(
*
ggml_custom3_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
GGML_API
struct
ggml_tensor
*
ggml_unary
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
...
...
@@ -1360,63 +1366,138 @@ extern "C" {
struct
ggml_tensor
*
a
,
enum
ggml_unary_op
op
);
GGML_API
struct
ggml_tensor
*
ggml_map_unary_f32
(
// custom operators
typedef
void
(
*
ggml_unary_op_f32_t
)
(
const
int
,
float
*
,
const
float
*
);
typedef
void
(
*
ggml_binary_op_f32_t
)(
const
int
,
float
*
,
const
float
*
,
const
float
*
);
typedef
void
(
*
ggml_custom1_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
typedef
void
(
*
ggml_custom2_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
typedef
void
(
*
ggml_custom3_op_f32_t
)(
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
,
const
struct
ggml_tensor
*
);
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_unary_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_unary_op_f32_t
fun
);
ggml_unary_op_f32_t
fun
),
"use ggml_map_custom1 instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_unary_inplace_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_unary_inplace_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_unary_op_f32_t
fun
);
ggml_unary_op_f32_t
fun
),
"use ggml_map_custom1_inplace instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_binary_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_binary_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_binary_op_f32_t
fun
);
ggml_binary_op_f32_t
fun
),
"use ggml_map_custom2 instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_binary_inplace_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_binary_inplace_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_binary_op_f32_t
fun
);
ggml_binary_op_f32_t
fun
),
"use ggml_map_custom2_inplace instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom1_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom1_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_custom1_op_f32_t
fun
);
ggml_custom1_op_f32_t
fun
),
"use ggml_map_custom1 instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom1_inplace_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom1_inplace_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_custom1_op_f32_t
fun
);
ggml_custom1_op_f32_t
fun
),
"use ggml_map_custom1_inplace instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom2_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom2_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_custom2_op_f32_t
fun
);
ggml_custom2_op_f32_t
fun
),
"use ggml_map_custom2 instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom2_inplace_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom2_inplace_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_custom2_op_f32_t
fun
);
ggml_custom2_op_f32_t
fun
),
"use ggml_map_custom2_inplace instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom3_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom3_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
,
ggml_custom3_op_f32_t
fun
);
ggml_custom3_op_f32_t
fun
),
"use ggml_map_custom3 instead"
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom3_inplace_f32
(
GGML_DEPRECATED
(
GGML_API
struct
ggml_tensor
*
ggml_map_custom3_inplace_f32
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
,
ggml_custom3_op_f32_t
fun
);
ggml_custom3_op_f32_t
fun
),
"use ggml_map_custom3_inplace instead"
);
// custom operators v2
typedef
void
(
*
ggml_custom1_op_t
)(
struct
ggml_tensor
*
dst
,
const
struct
ggml_tensor
*
a
,
int
ith
,
int
nth
,
void
*
userdata
);
typedef
void
(
*
ggml_custom2_op_t
)(
struct
ggml_tensor
*
dst
,
const
struct
ggml_tensor
*
a
,
const
struct
ggml_tensor
*
b
,
int
ith
,
int
nth
,
void
*
userdata
);
typedef
void
(
*
ggml_custom3_op_t
)(
struct
ggml_tensor
*
dst
,
const
struct
ggml_tensor
*
a
,
const
struct
ggml_tensor
*
b
,
const
struct
ggml_tensor
*
c
,
int
ith
,
int
nth
,
void
*
userdata
);
#define GGML_N_TASKS_MAX -1
GGML_API
struct
ggml_tensor
*
ggml_map_custom1
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_custom1_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom1_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
ggml_custom1_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom2
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_custom2_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom2_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
ggml_custom2_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom3
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
,
ggml_custom3_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
GGML_API
struct
ggml_tensor
*
ggml_map_custom3_inplace
(
struct
ggml_context
*
ctx
,
struct
ggml_tensor
*
a
,
struct
ggml_tensor
*
b
,
struct
ggml_tensor
*
c
,
ggml_custom3_op_t
fun
,
int
n_tasks
,
void
*
userdata
);
// loss function
...
...
llm/k_quants.c
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/k_quants.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
llm/llama-util.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
@@ -175,6 +175,46 @@ struct llama_file {
}
};
// llama_context_data
struct
llama_data_context
{
virtual
void
write
(
const
void
*
src
,
size_t
size
)
=
0
;
virtual
size_t
get_size_written
()
=
0
;
virtual
~
llama_data_context
()
=
default
;
};
struct
llama_data_buffer_context
:
llama_data_context
{
uint8_t
*
ptr
;
size_t
size_written
=
0
;
llama_data_buffer_context
(
uint8_t
*
p
)
:
ptr
(
p
)
{}
void
write
(
const
void
*
src
,
size_t
size
)
override
{
memcpy
(
ptr
,
src
,
size
);
ptr
+=
size
;
size_written
+=
size
;
}
size_t
get_size_written
()
override
{
return
size_written
;
}
};
struct
llama_data_file_context
:
llama_data_context
{
llama_file
*
file
;
size_t
size_written
=
0
;
llama_data_file_context
(
llama_file
*
f
)
:
file
(
f
)
{}
void
write
(
const
void
*
src
,
size_t
size
)
override
{
file
->
write_raw
(
src
,
size
);
size_written
+=
size
;
}
size_t
get_size_written
()
override
{
return
size_written
;
}
};
#if defined(_WIN32)
static
std
::
string
llama_format_win_err
(
DWORD
err
)
{
LPSTR
buf
;
...
...
@@ -205,7 +245,7 @@ struct llama_mmap {
// prefetch/readahead impairs performance on NUMA systems
if
(
numa
)
{
prefetch
=
0
;
}
#ifdef __linux__
if
(
prefetch
)
{
flags
|=
MAP_POPULATE
;
}
if
(
prefetch
>=
file
->
size
)
{
flags
|=
MAP_POPULATE
;
}
#endif
addr
=
mmap
(
NULL
,
file
->
size
,
PROT_READ
,
flags
,
fd
,
0
);
if
(
addr
==
MAP_FAILED
)
{
...
...
llm/llama.cpp
View file @
22885aea
This diff is collapsed.
Click to expand it.
llm/llama.go
View file @
22885aea
package
llm
/*
#cgo CPPFLAGS: -O3 -Wall -Wextra -Wno-unused-function -Wno-unused-variable -DNDEBUG -DGGML_USE_K_QUANTS
#cgo CXXFLAGS: -std=gnu++11
#cgo CFLAGS: -Ofast -std=c11 -fPIC
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -DNDEBUG -DGGML_USE_K_QUANTS
#cgo CXXFLAGS: -std=c++11 -fPIC
#cgo darwin CPPFLAGS: -DGGML_USE_ACCELERATE
#cgo darwin,arm64 CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin LDFLAGS: -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
...
...
llm/llama.h
View file @
22885aea
/**
* llama.cpp - git
8183159cf3def112f6d1fe94815fce70e1bffa12
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
*
* MIT License
*
...
...
@@ -112,7 +112,20 @@ extern "C" {
typedef
void
(
*
llama_progress_callback
)(
float
progress
,
void
*
ctx
);
struct
llama_context_params
{
enum
llama_log_level
{
LLAMA_LOG_LEVEL_ERROR
=
2
,
LLAMA_LOG_LEVEL_WARN
=
3
,
LLAMA_LOG_LEVEL_INFO
=
4
};
// Signature for logging events
// Note that text includes the new line character at the end for most events.
// If your logging mechanism cannot handle that, check if the last character is '\n' and strip it
// if it exists.
// It might not exist for progress report where '.' is output repeatedly.
typedef
void
(
*
llama_log_callback
)(
enum
llama_log_level
level
,
const
char
*
text
,
void
*
user_data
);
struct
llama_context_params
{
uint32_t
seed
;
// RNG seed, -1 for random
int32_t
n_ctx
;
// text context
int32_t
n_batch
;
// prompt processing batch size
...
...
@@ -221,6 +234,10 @@ extern "C" {
int32_t
n_eval
;
};
// Set callback for all future logging events.
// If this is not called, or NULL is supplied, everything is output on stderr.
LLAMA_API
void
llama_log_set
(
llama_log_callback
log_callback
,
void
*
user_data
);
LLAMA_API
int
llama_max_devices
();
LLAMA_API
struct
llama_context_params
llama_context_default_params
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment