Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
1ffb1e28
Unverified
Commit
1ffb1e28
authored
Mar 09, 2024
by
Jeffrey Morgan
Committed by
GitHub
Mar 09, 2024
Browse files
update llama.cpp submodule to `77d1ac7` (#3030)
parent
0a784441
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
24 additions
and
66 deletions
+24
-66
llm/ext_server/ext_server.cpp
llm/ext_server/ext_server.cpp
+1
-1
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+8
-1
llm/llama.cpp
llm/llama.cpp
+1
-1
llm/patches/02-cudaleaks.diff
llm/patches/02-cudaleaks.diff
+12
-11
llm/payload_common.go
llm/payload_common.go
+1
-51
llm/payload_darwin_amd64.go
llm/payload_darwin_amd64.go
+1
-1
No files found.
llm/ext_server/ext_server.cpp
View file @
1ffb1e28
...
...
@@ -125,7 +125,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
return
;
}
llama
->
init
ialize
();
llama
->
init
();
}
catch
(
std
::
exception
&
e
)
{
err
->
id
=
-
1
;
snprintf
(
err
->
msg
,
err
->
msg_len
,
"exception %s"
,
e
.
what
());
...
...
llm/generate/gen_darwin.sh
View file @
1ffb1e28
...
...
@@ -60,12 +60,19 @@ case "${GOARCH}" in
compress_libs
;;
"arm64"
)
CMAKE_DEFS
=
"
${
COMMON_DARWIN_DEFS
}
-DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=
${
ARCH
}
-DCMAKE_OSX_ARCHITECTURES=
${
ARCH
}
-DLLAMA_METAL=on
${
CMAKE_DEFS
}
"
# bundle ggml-common.h and ggml-metal.metal into a single file
grep
-v
'#include "ggml-common.h"'
"
${
LLAMACPP_DIR
}
/ggml-metal.metal"
|
grep
-v
'#pragma once'
>
"
${
LLAMACPP_DIR
}
/ggml-metal.metal.temp"
echo
'#define GGML_COMMON_IMPL_METAL'
>
"
${
LLAMACPP_DIR
}
/ggml-metal.metal"
cat
"
${
LLAMACPP_DIR
}
/ggml-common.h"
|
grep
-v
'#pragma once'
>>
"
${
LLAMACPP_DIR
}
/ggml-metal.metal"
cat
"
${
LLAMACPP_DIR
}
/ggml-metal.metal.temp"
>>
"
${
LLAMACPP_DIR
}
/ggml-metal.metal"
rm
"
${
LLAMACPP_DIR
}
/ggml-metal.metal.temp"
CMAKE_DEFS
=
"
${
COMMON_DARWIN_DEFS
}
-DLLAMA_METAL_EMBED_LIBRARY=on -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=
${
ARCH
}
-DCMAKE_OSX_ARCHITECTURES=
${
ARCH
}
-DLLAMA_METAL=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/metal"
EXTRA_LIBS
=
"
${
EXTRA_LIBS
}
-framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
sign
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/metal/lib/libext_server.dylib
compress_libs
(
cd
${
LLAMACPP_DIR
}
&&
git checkout ggml-metal.metal
)
;;
*
)
echo
"GOARCH must be set"
...
...
llama.cpp
@
77d1ac7e
Compare
c2101a2e
...
77d1ac7e
Subproject commit
c2101a2e909ac7c08976d414e64e96c90ee5fa9e
Subproject commit
77d1ac7e00bf049b9f2bba1b5a310a78318c49c4
llm/patches/02-cudaleaks.diff
View file @
1ffb1e28
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index
f255ad76..5b83ac
b1 100644
index
b14cca61..02bfd4
b1 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -28,6 +28,10 @@
#include <thread>
@@ -29,6 +29,10 @@
#include <signal.h>
#include <memory>
+#ifdef GGML_USE_CUBLAS
+extern "C" GGML_CALL void ggml_free_cublas(void);
...
...
@@ -13,7 +13,7 @@ index f255ad76..5b83acb1 100644
using json = nlohmann::json;
bool server_verbose = false;
@@ -64
8
,6 +6
52
,10 @@
struct server_context {
@@ -6
6
4,6 +6
68
,10 @@
struct server_context {
llama_free_model(model);
model = nullptr;
}
...
...
@@ -24,7 +24,7 @@ index f255ad76..5b83acb1 100644
}
bool load_model(const gpt_params & params_) {
@@ -3
33
9,6 +3
34
7,7 @@
int main(int argc, char ** argv) {
@@ -3
49
9,6 +3
50
7,7 @@
int main(int argc, char ** argv) {
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
...
...
@@ -33,10 +33,10 @@ index f255ad76..5b83acb1 100644
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index
72bcec8c..50a45e3d
100644
index
c207ff87..945708a4
100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -4
3
,6 +4
3
,7 @@
@@ -4
6
,6 +4
6
,7 @@
#define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width)
#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6
#define cublasCreate hipblasCreate
...
...
@@ -44,7 +44,7 @@ index 72bcec8c..50a45e3d 100644
#define cublasGemmEx hipblasGemmEx
#define cublasGemmBatchedEx hipblasGemmBatchedEx
#define cublasGemmStridedBatchedEx hipblasGemmStridedBatchedEx
@@ -8
751
,10 +8
752
,10 @@
GGML_CALL bool ggml_cublas_loaded(void) {
@@ -8
014
,10 +8
015
,10 @@
GGML_CALL bool ggml_cublas_loaded(void) {
return g_cublas_loaded;
}
...
...
@@ -58,7 +58,7 @@ index 72bcec8c..50a45e3d 100644
#ifdef __HIP_PLATFORM_AMD__
// Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -8
764
,7 +8
765
,7 @@
GGML_CALL void ggml_init_cublas() {
@@ -8
027
,7 +8
028
,7 @@
GGML_CALL void ggml_init_cublas() {
#endif
if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
...
...
@@ -67,7 +67,7 @@ index 72bcec8c..50a45e3d 100644
g_cublas_loaded = false;
fprintf(stderr, "%s: no " GGML_CUDA_NAME " devices found, " GGML_CUDA_NAME " will be disabled\n", __func__);
return;
@@ -8
835
,7 +8
836
,7 @@
GGML_CALL void ggml_init_cublas() {
@@ -8
098
,7 +8
099
,7 @@
GGML_CALL void ggml_init_cublas() {
// configure logging to stdout
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
...
...
@@ -76,11 +76,12 @@ index 72bcec8c..50a45e3d 100644
g_cublas_loaded = true;
}
}
@@ -1
2490
,3 +1
2491
,2
2
@@
GGML_CALL int ggml_backend_cuda_reg_devices() {
@@ -1
1753
,3 +1
1754
,2
3
@@
GGML_CALL int ggml_backend_cuda_reg_devices() {
}
return device_count;
}
+
+
+extern "C" GGML_CALL void ggml_free_cublas(void);
+GGML_CALL void ggml_free_cublas(void) {
+ for (int id = 0; id < g_device_count; ++id) {
...
...
llm/payload_common.go
View file @
1ffb1e28
...
...
@@ -108,20 +108,8 @@ func nativeInit() error {
if
err
!=
nil
{
return
err
}
slog
.
Info
(
fmt
.
Sprintf
(
"Extracting dynamic libraries to %s ..."
,
payloadsDir
))
if
runtime
.
GOOS
==
"darwin"
{
err
:=
extractPayloadFiles
(
payloadsDir
,
"llama.cpp/ggml-metal.metal"
)
if
err
!=
nil
{
if
err
==
payloadMissing
{
// TODO perhaps consider this a hard failure on arm macs?
slog
.
Info
(
"ggml-meta.metal payload missing"
)
return
nil
}
return
err
}
os
.
Setenv
(
"GGML_METAL_PATH_RESOURCES"
,
payloadsDir
)
}
slog
.
Info
(
fmt
.
Sprintf
(
"Extracting dynamic libraries to %s ..."
,
payloadsDir
))
libs
,
err
:=
extractDynamicLibs
(
payloadsDir
,
"llama.cpp/build/*/*/*/lib/*"
)
if
err
!=
nil
{
...
...
@@ -211,44 +199,6 @@ func extractDynamicLibs(payloadsDir, glob string) ([]string, error) {
return
libs
,
g
.
Wait
()
}
func
extractPayloadFiles
(
payloadsDir
,
glob
string
)
error
{
files
,
err
:=
fs
.
Glob
(
libEmbed
,
glob
)
if
err
!=
nil
||
len
(
files
)
==
0
{
return
payloadMissing
}
for
_
,
file
:=
range
files
{
srcFile
,
err
:=
libEmbed
.
Open
(
file
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"read payload %s: %v"
,
file
,
err
)
}
defer
srcFile
.
Close
()
if
err
:=
os
.
MkdirAll
(
payloadsDir
,
0
o755
);
err
!=
nil
{
return
fmt
.
Errorf
(
"create payload lib dir %s: %v"
,
payloadsDir
,
err
)
}
src
:=
io
.
Reader
(
srcFile
)
filename
:=
file
if
strings
.
HasSuffix
(
file
,
".gz"
)
{
src
,
err
=
gzip
.
NewReader
(
src
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"decompress payload %s: %v"
,
file
,
err
)
}
filename
=
strings
.
TrimSuffix
(
filename
,
".gz"
)
}
destFile
:=
filepath
.
Join
(
payloadsDir
,
filepath
.
Base
(
filename
))
destFp
,
err
:=
os
.
OpenFile
(
destFile
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
0
o755
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"write payload %s: %v"
,
file
,
err
)
}
defer
destFp
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFp
,
src
);
err
!=
nil
{
return
fmt
.
Errorf
(
"copy payload %s: %v"
,
file
,
err
)
}
}
return
nil
}
func
verifyDriverAccess
()
error
{
if
runtime
.
GOOS
!=
"linux"
{
return
nil
...
...
llm/payload_darwin_amd64.go
View file @
1ffb1e28
...
...
@@ -4,5 +4,5 @@ import (
"embed"
)
//go:embed
llama.cpp/ggml-metal.metal
llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
//go:embed llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
var
libEmbed
embed
.
FS
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment