Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
df40b11d
Unverified
Commit
df40b11d
authored
Jan 18, 2024
by
Daniel Hiltgen
Committed by
GitHub
Jan 18, 2024
Browse files
Merge pull request #2007 from dhiltgen/cpu_fallback
Add multiple CPU variants for Intel Mac
parents
d5a73533
b992bf65
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
321 additions
and
178 deletions
+321
-178
.github/workflows/test.yaml
.github/workflows/test.yaml
+4
-1
Dockerfile.build
Dockerfile.build
+8
-2
llm/dyn_ext_server.c
llm/dyn_ext_server.c
+3
-3
llm/dyn_ext_server.go
llm/dyn_ext_server.go
+1
-10
llm/ext_server/CMakeLists.txt
llm/ext_server/CMakeLists.txt
+10
-14
llm/generate/gen_common.sh
llm/generate/gen_common.sh
+53
-20
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+42
-23
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+40
-47
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+55
-15
llm/payload_common.go
llm/payload_common.go
+59
-33
llm/payload_darwin_amd64.go
llm/payload_darwin_amd64.go
+8
-0
llm/payload_darwin_arm64.go
llm/payload_darwin_arm64.go
+8
-0
llm/payload_linux.go
llm/payload_linux.go
+1
-1
llm/payload_windows.go
llm/payload_windows.go
+1
-1
scripts/build_darwin.sh
scripts/build_darwin.sh
+23
-8
scripts/build_remote.py
scripts/build_remote.py
+4
-0
scripts/rh_linux_deps.sh
scripts/rh_linux_deps.sh
+1
-0
No files found.
.github/workflows/test.yaml
View file @
df40b11d
...
@@ -79,13 +79,16 @@ jobs:
...
@@ -79,13 +79,16 @@ jobs:
strategy
:
strategy
:
matrix
:
matrix
:
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
arch
:
[
amd64
,
arm64
]
arch
:
[
amd64
]
exclude
:
exclude
:
-
os
:
ubuntu-latest
-
os
:
ubuntu-latest
arch
:
arm64
arch
:
arm64
-
os
:
windows-latest
-
os
:
windows-latest
arch
:
arm64
arch
:
arm64
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
env
:
GOARCH
:
${{ matrix.arch }}
CGO_ENABLED
:
"
1"
steps
:
steps
:
-
uses
:
actions/checkout@v4
-
uses
:
actions/checkout@v4
with
:
with
:
...
...
Dockerfile.build
View file @
df40b11d
...
@@ -10,6 +10,7 @@ COPY llm llm
...
@@ -10,6 +10,7 @@ COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
...
@@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
...
@@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
...
@@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
...
@@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
...
@@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
...
@@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
ARG CMAKE_VERSION
ARG CMAKE_VERSION
ARG GOLANG_VERSION
ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh /
COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -72,7 +78,7 @@ RUN sh gen_linux.sh
...
@@ -72,7 +78,7 @@ RUN sh gen_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED 1
ENV CGO_ENABLED 1
ARG GOFLAGS
ARG GOFLAGS
ARG CGO_FLAGS
ARG CGO_
C
FLAGS
WORKDIR /go/src/github.com/jmorganca/ollama
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
COPY . .
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
...
@@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
...
@@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
ENV CGO_ENABLED 1
ENV CGO_ENABLED 1
ARG GOLANG_VERSION
ARG GOLANG_VERSION
ARG GOFLAGS
ARG GOFLAGS
ARG CGO_FLAGS
ARG CGO_
C
FLAGS
WORKDIR /go/src/github.com/jmorganca/ollama
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
COPY . .
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
...
...
llm/dyn_ext_server.c
View file @
df40b11d
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#ifdef __linux__
#ifdef __linux__
#include <dlfcn.h>
#include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags
| RTLD_DEEPBIND
)
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() strdup(dlerror())
#define LOAD_ERR() strdup(dlerror())
#define UNLOAD_LIBRARY(handle) dlclose(handle)
#define UNLOAD_LIBRARY(handle) dlclose(handle)
...
@@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
...
@@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
{
""
,
NULL
},
{
""
,
NULL
},
};
};
printf
(
"loading
%s
library
\n
"
,
libPath
);
printf
(
"loading library
%s
\n
"
,
libPath
);
s
->
handle
=
LOAD_LIBRARY
(
libPath
,
RTLD_NOW
);
s
->
handle
=
LOAD_LIBRARY
(
libPath
,
RTLD_GLOBAL
|
RTLD_NOW
);
if
(
!
s
->
handle
)
{
if
(
!
s
->
handle
)
{
err
->
id
=
-
1
;
err
->
id
=
-
1
;
char
*
msg
=
LOAD_ERR
();
char
*
msg
=
LOAD_ERR
();
...
...
llm/dyn_ext_server.go
View file @
df40b11d
...
@@ -372,15 +372,6 @@ func updatePath(dir string) {
...
@@ -372,15 +372,6 @@ func updatePath(dir string) {
newPath
:=
strings
.
Join
(
append
([]
string
{
dir
},
pathComponents
...
),
";"
)
newPath
:=
strings
.
Join
(
append
([]
string
{
dir
},
pathComponents
...
),
";"
)
log
.
Printf
(
"Updating PATH to %s"
,
newPath
)
log
.
Printf
(
"Updating PATH to %s"
,
newPath
)
os
.
Setenv
(
"PATH"
,
newPath
)
os
.
Setenv
(
"PATH"
,
newPath
)
}
else
{
pathComponents
:=
strings
.
Split
(
os
.
Getenv
(
"LD_LIBRARY_PATH"
),
":"
)
for
_
,
comp
:=
range
pathComponents
{
if
comp
==
dir
{
return
}
}
newPath
:=
strings
.
Join
(
append
([]
string
{
dir
},
pathComponents
...
),
":"
)
log
.
Printf
(
"Updating LD_LIBRARY_PATH to %s"
,
newPath
)
os
.
Setenv
(
"LD_LIBRARY_PATH"
,
newPath
)
}
}
// linux and darwin rely on rpath
}
}
llm/ext_server/CMakeLists.txt
View file @
df40b11d
...
@@ -2,28 +2,24 @@
...
@@ -2,28 +2,24 @@
set
(
TARGET ext_server
)
set
(
TARGET ext_server
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
add_library
(
${
TARGET
}
STATIC ../../../ext_server/ext_server.cpp
)
if
(
WIN32
)
add_library
(
${
TARGET
}
SHARED ../../../ext_server/ext_server.cpp ../../llama.cpp
)
else
()
add_library
(
${
TARGET
}
STATIC ../../../ext_server/ext_server.cpp ../../llama.cpp
)
endif
()
target_include_directories
(
${
TARGET
}
PRIVATE ../../common
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../common
)
target_include_directories
(
${
TARGET
}
PRIVATE ../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../..
)
target_compile_features
(
${
TARGET
}
PRIVATE cxx_std_11
)
target_compile_features
(
${
TARGET
}
PRIVATE cxx_std_11
)
target_compile_definitions
(
${
TARGET
}
PUBLIC LLAMA_SERVER_LIBRARY=1
)
target_compile_definitions
(
${
TARGET
}
PUBLIC LLAMA_SERVER_LIBRARY=1
)
target_link_libraries
(
${
TARGET
}
PRIVATE common llama llava
${
CMAKE_THREAD_LIBS_INIT
}
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llava common
)
target_compile_definitions
(
${
TARGET
}
PRIVATE
set_target_properties
(
${
TARGET
}
PROPERTIES POSITION_INDEPENDENT_CODE ON
)
SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
target_compile_definitions
(
${
TARGET
}
PRIVATE SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
)
)
install
(
TARGETS ext_server LIBRARY
)
if
(
BUILD_SHARED_LIBS
)
set_target_properties
(
ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_definitions
(
ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD
)
add_library
(
ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>
)
target_link_libraries
(
ext_server_shared PRIVATE ggml llama llava common
${
CMAKE_THREAD_LIBS_INIT
}
)
install
(
TARGETS ext_server_shared LIBRARY
)
endif
()
if
(
CUDAToolkit_FOUND
)
if
(
CUDAToolkit_FOUND
)
target_include_directories
(
${
TARGET
}
PRIVATE
${
CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
}
)
target_include_directories
(
${
TARGET
}
PRIVATE
${
CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
}
)
if
(
WIN32
)
if
(
WIN32
)
target_link_libraries
(
ext_server_shared
PRIVATE nvml
)
target_link_libraries
(
${
TARGET
}
PRIVATE nvml
)
endif
()
endif
()
endif
()
endif
()
\ No newline at end of file
llm/generate/gen_common.sh
View file @
df40b11d
# common logic accross linux and darwin
# common logic accross linux and darwin
init_vars
()
{
init_vars
()
{
case
"
${
GOARCH
}
"
in
"amd64"
)
ARCH
=
"x86_64"
;;
"arm64"
)
ARCH
=
"arm64"
;;
*
)
ARCH
=
$(
uname
-m
|
sed
-e
"s/aarch64/arm64/g"
)
esac
LLAMACPP_DIR
=
../llama.cpp
LLAMACPP_DIR
=
../llama.cpp
CMAKE_DEFS
=
""
CMAKE_DEFS
=
""
CMAKE_TARGETS
=
"--target
ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static
"
CMAKE_TARGETS
=
"--target
ext_server
"
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on
${
CMAKE_DEFS
}
"
else
else
# TODO - add additional optimization flags...
# TODO - add additional optimization flags...
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off
${
CMAKE_DEFS
}
"
fi
fi
case
$(
uname
-s
)
in
"Darwin"
)
LIB_EXT
=
"dylib"
WHOLE_ARCHIVE
=
"-Wl,-force_load"
NO_WHOLE_ARCHIVE
=
""
GCC_ARCH
=
"-arch
${
ARCH
}
"
;;
"Linux"
)
LIB_EXT
=
"so"
WHOLE_ARCHIVE
=
"-Wl,--whole-archive"
NO_WHOLE_ARCHIVE
=
"-Wl,--no-whole-archive"
# Cross compiling not supported on linux - Use docker
GCC_ARCH
=
""
;;
*
)
;;
esac
}
}
git_module_setup
()
{
git_module_setup
()
{
...
@@ -40,25 +69,29 @@ apply_patches() {
...
@@ -40,25 +69,29 @@ apply_patches() {
build
()
{
build
()
{
cmake
-S
${
LLAMACPP_DIR
}
-B
${
BUILD_DIR
}
${
CMAKE_DEFS
}
cmake
-S
${
LLAMACPP_DIR
}
-B
${
BUILD_DIR
}
${
CMAKE_DEFS
}
cmake
--build
${
BUILD_DIR
}
${
CMAKE_TARGETS
}
-j8
cmake
--build
${
BUILD_DIR
}
${
CMAKE_TARGETS
}
-j8
mkdir
-p
${
BUILD_DIR
}
/lib/
g++
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.
${
LIB_EXT
}
\
${
GCC_ARCH
}
\
${
WHOLE_ARCHIVE
}
${
BUILD_DIR
}
/examples/server/libext_server.a
${
NO_WHOLE_ARCHIVE
}
\
${
BUILD_DIR
}
/common/libcommon.a
\
${
BUILD_DIR
}
/libllama.a
\
-Wl
,-rpath,
\$
ORIGIN
\
-lpthread
-ldl
-lm
\
${
EXTRA_LIBS
}
}
}
install
()
{
compress_libs
()
{
rm
-rf
${
BUILD_DIR
}
/lib
echo
"Compressing payloads to reduce overall binary size..."
mkdir
-p
${
BUILD_DIR
}
/lib
pids
=
""
cp
${
BUILD_DIR
}
/examples/server/libext_server.a
${
BUILD_DIR
}
/lib
for
lib
in
${
BUILD_DIR
}
/lib/
*
.
${
LIB_EXT
}*
;
do
cp
${
BUILD_DIR
}
/common/libcommon.a
${
BUILD_DIR
}
/lib
bzip2
-v9
${
lib
}
&
cp
${
BUILD_DIR
}
/libllama.a
${
BUILD_DIR
}
/lib
pids+
=
"
$!
"
cp
${
BUILD_DIR
}
/libggml_static.a
${
BUILD_DIR
}
/lib
done
}
echo
for
pid
in
${
pids
}
;
do
link_server_lib
()
{
wait
$pid
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
done
-Wl
,--whole-archive
\
echo
"Finished compression"
${
BUILD_DIR
}
/lib/libext_server.a
\
-Wl
,--no-whole-archive
\
${
BUILD_DIR
}
/lib/libcommon.a
\
${
BUILD_DIR
}
/lib/libllama.a
\
-lstdc
++
}
}
# Keep the local tree clean after we're done with the build
# Keep the local tree clean after we're done with the build
...
...
llm/generate/gen_darwin.sh
View file @
df40b11d
...
@@ -9,16 +9,52 @@ set -o pipefail
...
@@ -9,16 +9,52 @@ set -o pipefail
echo
"Starting darwin generate script"
echo
"Starting darwin generate script"
source
$(
dirname
$0
)
/gen_common.sh
source
$(
dirname
$0
)
/gen_common.sh
init_vars
init_vars
CMAKE_DEFS
=
"-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on
${
CMAKE_DEFS
}
"
git_module_setup
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/metal"
apply_patches
COMMON_DARWIN_DEFS
=
"-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=off"
case
"
${
GOARCH
}
"
in
case
"
${
GOARCH
}
"
in
"amd64"
)
"amd64"
)
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
COMMON_CPU_DEFS
=
"
${
COMMON_DARWIN_DEFS
}
-DCMAKE_SYSTEM_PROCESSOR=
${
ARCH
}
-DCMAKE_OSX_ARCHITECTURES=
${
ARCH
}
-DLLAMA_METAL=off -DLLAMA_NATIVE=off"
ARCH
=
"x86_64"
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/cpu"
echo
"Building LCD CPU"
build
compress_libs
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/cpu_avx"
echo
"Building AVX CPU"
build
compress_libs
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/cpu_avx2"
echo
"Building AVX2 CPU"
build
compress_libs
;;
;;
"arm64"
)
"arm64"
)
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
COMMON_DARWIN_DEFS
}
-DCMAKE_SYSTEM_PROCESSOR=
${
ARCH
}
-DCMAKE_OSX_ARCHITECTURES=
${
ARCH
}
-DLLAMA_METAL=on -DLLAMA_ACCELERATE=on
${
CMAKE_DEFS
}
"
ARCH
=
"arm64"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/
${
ARCH
}
/metal"
EXTRA_LIBS
=
"
${
EXTRA_LIBS
}
-framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
compress_libs
;;
;;
*
)
*
)
echo
"GOARCH must be set"
echo
"GOARCH must be set"
...
@@ -27,21 +63,4 @@ case "${GOARCH}" in
...
@@ -27,21 +63,4 @@ case "${GOARCH}" in
;;
;;
esac
esac
git_module_setup
apply_patches
build
install
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
-arch
${
ARCH
}
\
-Wl
,-force_load
${
BUILD_DIR
}
/lib/libext_server.a
\
${
BUILD_DIR
}
/lib/libcommon.a
\
${
BUILD_DIR
}
/lib/libllama.a
\
${
BUILD_DIR
}
/lib/libggml_static.a
\
-lpthread
-ldl
-lm
-lc
++
\
-framework
Accelerate
\
-framework
Foundation
\
-framework
Metal
\
-framework
MetalKit
\
-framework
MetalPerformanceShaders
cleanup
cleanup
llm/generate/gen_linux.sh
View file @
df40b11d
...
@@ -2,16 +2,14 @@
...
@@ -2,16 +2,14 @@
# This script is intended to run inside the go generate
# This script is intended to run inside the go generate
# working directory must be llm/generate/
# working directory must be llm/generate/
# First we build our default built-in library which will be linked into the CGO
# First we build one or more CPU based LLM libraries
# binary as a normal dependency. This default build is CPU based.
#
#
# Then we build a CUDA dynamic library
(although statically linked with the CUDA
# Then
if we detect CUDA,
we build a CUDA dynamic library
, and carry the required
# library dependencies
for maximum portability)
# library dependencies
#
#
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
# important to be a dynamic lib even if it's the only GPU library detected because
# libraries are quite large, and also dynamically load data files at runtime
# we can't redistribute the objectfiles but must rely on dynamic libraries at
# which in turn are large, so we don't attempt to cary them as payload
# runtime, which could lead the server not to start if not present.
set
-ex
set
-ex
set
-o
pipefail
set
-o
pipefail
...
@@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cpu"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/
cpu"
echo
"Building custom CPU"
echo
"Building custom CPU"
build
build
install
compress_libs
link_server_lib
else
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
...
@@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
#
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cpu"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/
cpu"
echo
"Building LCD CPU"
echo
"Building LCD CPU"
build
build
install
compress_libs
link_server_lib
#
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
...
@@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
#
#
init_vars
init_vars
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cpu_avx"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/
cpu_avx"
echo
"Building AVX CPU"
echo
"Building AVX CPU"
build
build
install
compress_libs
link_server_lib
#
#
# ~2013 CPU Dynamic library
# ~2013 CPU Dynamic library
...
@@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
#
#
init_vars
init_vars
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
COMMON_CPU_DEFS
}
-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cpu_avx2"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/
cpu_avx2"
echo
"Building AVX2 CPU"
echo
"Building AVX2 CPU"
build
build
install
compress_libs
link_server_lib
fi
fi
else
else
echo
"Skipping CPU generation step as requested"
echo
"Skipping CPU generation step as requested"
...
@@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
...
@@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
fi
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cuda
${
CUDA_VARIANT
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
build
build
install
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
# Cary the CUDA libs as payloads to help reduce dependency burden on users
-Wl
,--whole-archive
\
#
${
BUILD_DIR
}
/lib/libext_server.a
\
# TODO - in the future we may shift to packaging these separately and conditionally
${
BUILD_DIR
}
/lib/libcommon.a
\
# downloading them in the install script.
${
BUILD_DIR
}
/lib/libllama.a
\
DEPS
=
"
$(
ldd
${
BUILD_DIR
}
/lib/libext_server.so
)
"
-Wl
,--no-whole-archive
\
for
lib
in
libcudart.so libcublas.so libcublasLt.so
;
do
${
CUDA_LIB_DIR
}
/libcudart_static.a
\
DEP
=
$(
echo
"
${
DEPS
}
"
|
grep
${
lib
}
|
cut
-f1
-d
' '
| xargs
||
true
)
${
CUDA_LIB_DIR
}
/libcublas_static.a
\
if
[
-n
"
${
DEP
}
"
-a
-e
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
]
;
then
${
CUDA_LIB_DIR
}
/libcublasLt_static.a
\
cp
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
"
${
BUILD_DIR
}
/lib/"
${
CUDA_LIB_DIR
}
/libcudadevrt.a
\
elif
[
-e
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
]
;
then
${
CUDA_LIB_DIR
}
/libculibos.a
\
cp
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
"
${
BUILD_DIR
}
/lib/"
-lcuda
\
else
-lrt
-lpthread
-ldl
-lstdc
++
-lm
cp
-d
"
${
CUDA_LIB_DIR
}
/
${
lib
}
*"
"
${
BUILD_DIR
}
/lib/"
fi
done
compress_libs
fi
fi
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
...
@@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
...
@@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
fi
fi
init_vars
init_vars
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=
$ROCM_PATH
/llvm/bin/clang -DCMAKE_CXX_COMPILER=
$ROCM_PATH
/llvm/bin/clang++ -DAMDGPU_TARGETS=
$(
amdGPUs
)
-DGPU_TARGETS=
$(
amdGPUs
)
"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=
$ROCM_PATH
/llvm/bin/clang -DCMAKE_CXX_COMPILER=
$ROCM_PATH
/llvm/bin/clang++ -DAMDGPU_TARGETS=
$(
amdGPUs
)
-DGPU_TARGETS=
$(
amdGPUs
)
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/rocm
${
ROCM_VARIANT
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,
${
ROCM_PATH
}
/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
build
install
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
# Note: the ROCM libs and runtime library files are too large to embed, so we depend on
-Wl
,--whole-archive
\
# them being present at runtime on the host
${
BUILD_DIR
}
/lib/libext_server.a
\
compress_libs
${
BUILD_DIR
}
/lib/libcommon.a
\
${
BUILD_DIR
}
/lib/libllama.a
\
-Wl
,--no-whole-archive
\
-lrt
-lpthread
-ldl
-lstdc
++
-lm
\
-L
/opt/rocm/lib
-L
/opt/amdgpu/lib/x86_64-linux-gnu/
\
-Wl
,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/
\
-lhipblas
-lrocblas
-lamdhip64
-lrocsolver
-lamd_comgr
-lhsa-runtime64
-lrocsparse
-ldrm
-ldrm_amdgpu
fi
fi
cleanup
cleanup
llm/generate/gen_windows.ps1
View file @
df40b11d
...
@@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
...
@@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
function
init_vars
{
function
init_vars
{
$
script
:
llamacppDir
=
"../llama.cpp"
$
script
:
llamacppDir
=
"../llama.cpp"
$
script
:
cmakeDefs
=
@(
"-DBUILD_SHARED_LIBS=on"
,
"-DLLAMA_NATIVE=off"
,
"-A"
,
"x64"
)
$
script
:
cmakeDefs
=
@(
"-DBUILD_SHARED_LIBS=on"
,
"-DLLAMA_NATIVE=off"
,
"-A"
,
"x64"
)
$
script
:
cmakeTargets
=
@(
"ggml"
,
"ggml_static"
,
"llama"
,
"build_info"
,
"common"
,
"ext_server_shared"
,
"llava_static"
)
$
script
:
cmakeTargets
=
@(
"ext_server"
)
$
script
:
ARCH
=
"amd64"
# arm not yet supported.
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
)
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
)
$
script
:
config
=
"RelWithDebInfo"
$
script
:
config
=
"RelWithDebInfo"
...
@@ -13,6 +14,17 @@ function init_vars {
...
@@ -13,6 +14,17 @@ function init_vars {
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_SERVER_VERBOSE=off"
)
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_SERVER_VERBOSE=off"
)
$
script
:
config
=
"Release"
$
script
:
config
=
"Release"
}
}
# Try to find the CUDA dir
if
(
$
env
:
CUDA_LIB_DIR
-eq
$null
)
{
$d
=
(
get-command
-ea
'silentlycontinue'
nvcc
)
.
path
if
(
$d
-ne
$null
)
{
$
script
:
CUDA_LIB_DIR
=
(
$d
|
split-path
-parent
)
}
}
else
{
$
script
:
CUDA_LIB_DIR
=
$
env
:
CUDA_LIB_DIR
}
$
script
:
BZIP2
=
(
get-command
-ea
'silentlycontinue'
bzip2
)
.
path
$
script
:
DUMPBIN
=
(
get-command
-ea
'silentlycontinue'
dumpbin
)
.
path
}
}
function
git_module_setup
{
function
git_module_setup
{
...
@@ -47,11 +59,25 @@ function build {
...
@@ -47,11 +59,25 @@ function build {
function
install
{
function
install
{
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
cp
"
${script:buildDir}
/bin/
${script:config}
/ext_server
_shared
.dll"
"
${script:buildDir}
/lib"
cp
"
${script:buildDir}
/bin/
${script:config}
/ext_server.dll"
"
${script:buildDir}
/lib"
cp
"
${script:buildDir}
/bin/
${script:config}
/llama.dll"
"
${script:buildDir}
/lib"
cp
"
${script:buildDir}
/bin/
${script:config}
/llama.dll"
"
${script:buildDir}
/lib"
# Display the dll dependencies in the build log
# Display the dll dependencies in the build log
dumpbin
/dependents
"
${script:buildDir}
/bin/
${script:config}
/ext_server_shared.dll"
|
select-string
".dll"
if
(
$
script
:
DUMPBIN
-ne
$null
)
{
&
"
$
script
:
DUMPBIN
"
/dependents
"
${script:buildDir}
/bin/
${script:config}
/ext_server.dll"
|
select-string
".dll"
}
}
function
compress_libs
{
if
(
$
script
:
BZIP2
-eq
$null
)
{
write-host
"bzip2 not installed, not compressing files"
return
}
write-host
"Compressing dlls..."
$libs
=
dir
"
${script:buildDir}
/lib/*.dll"
foreach
(
$file
in
$libs
)
{
&
"
$
script
:
BZIP2
"
-v9
$file
}
}
}
function
cleanup
{
function
cleanup
{
...
@@ -71,33 +97,47 @@ apply_patches
...
@@ -71,33 +97,47 @@ apply_patches
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
,
"-DLLAMA_NATIVE=off"
)
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
,
"-DLLAMA_NATIVE=off"
)
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=off"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_F16C=off"
)
+
$
script
:
cmakeDefs
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=off"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_F16C=off"
)
+
$
script
:
cmakeDefs
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cpu"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/
${script:ARCH}
/
cpu"
write-host
"Building LCD CPU"
write-host
"Building LCD CPU"
build
build
install
install
compress_libs
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=on"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_F16C=off"
)
+
$
script
:
cmakeDefs
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=on"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_F16C=off"
)
+
$
script
:
cmakeDefs
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cpu_avx"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/
${script:ARCH}
/
cpu_avx"
write-host
"Building AVX CPU"
write-host
"Building AVX CPU"
build
build
install
install
compress_libs
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=on"
,
"-DLLAMA_AVX2=on"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=on"
,
"-DLLAMA_F16C=on"
)
+
$
script
:
cmakeDefs
$
script
:
cmakeDefs
=
$
script
:
commonCpuDefs
+
@(
"-DLLAMA_AVX=on"
,
"-DLLAMA_AVX2=on"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_FMA=on"
,
"-DLLAMA_F16C=on"
)
+
$
script
:
cmakeDefs
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cpu_avx2"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/
${script:ARCH}
/
cpu_avx2"
write-host
"Building AVX2 CPU"
write-host
"Building AVX2 CPU"
build
build
install
install
compress_libs
# Then build cuda as a dynamically loaded library
if
(
$null
-ne
$
script
:
CUDA_LIB_DIR
)
{
# TODO figure out how to detect cuda version
# Then build cuda as a dynamically loaded library
init_vars
$nvcc
=
(
get-command
-ea
'silentlycontinue'
nvcc
)
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cuda"
if
(
$null
-ne
$nvcc
)
{
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_CUBLAS=ON"
,
"-DLLAMA_AVX=on"
)
$
script
:
CUDA_VERSION
=
(
get-item
(
$nvcc
|
split-path
|
split-path
))
.
Basename
build
}
install
if
(
$null
-ne
$
script
:
CUDA_VERSION
)
{
$
script
:
CUDA_VARIANT
=
"_"
+
$
script
:
CUDA_VERSION
}
init_vars
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/
${script:ARCH}
/cuda
$
script
:
CUDA_VARIANT
"
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_CUBLAS=ON"
,
"-DLLAMA_AVX=on"
)
build
install
cp
"
${script:CUDA_LIB_DIR}
/cudart64_*.dll"
"
${script:buildDir}
/lib"
cp
"
${script:CUDA_LIB_DIR}
/cublas64_*.dll"
"
${script:buildDir}
/lib"
cp
"
${script:CUDA_LIB_DIR}
/cublasLt64_*.dll"
"
${script:buildDir}
/lib"
compress_libs
}
# TODO - actually implement ROCm support on windows
# TODO - actually implement ROCm support on windows
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/rocm"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/
${script:ARCH}
/
rocm"
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
...
...
llm/payload_common.go
View file @
df40b11d
package
llm
package
llm
import
(
import
(
"compress/bzip2"
"errors"
"errors"
"fmt"
"fmt"
"golang.org/x/exp/slices"
"io"
"io"
"io/fs"
"io/fs"
"log"
"log"
...
@@ -12,6 +12,9 @@ import (
...
@@ -12,6 +12,9 @@ import (
"runtime"
"runtime"
"strings"
"strings"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"github.com/jmorganca/ollama/gpu"
"github.com/jmorganca/ollama/gpu"
)
)
...
@@ -20,7 +23,7 @@ import (
...
@@ -20,7 +23,7 @@ import (
// Any library without a variant is the lowest common denominator
// Any library without a variant is the lowest common denominator
var
availableDynLibs
=
map
[
string
]
string
{}
var
availableDynLibs
=
map
[
string
]
string
{}
const
pathComponentCount
=
6
const
pathComponentCount
=
7
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
func
getDynLibs
(
gpuInfo
gpu
.
GpuInfo
)
[]
string
{
func
getDynLibs
(
gpuInfo
gpu
.
GpuInfo
)
[]
string
{
...
@@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
...
@@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
}
}
func
nativeInit
(
workdir
string
)
error
{
func
nativeInit
(
workdir
string
)
error
{
log
.
Printf
(
"Extracting dynamic libraries..."
)
if
runtime
.
GOOS
==
"darwin"
{
if
runtime
.
GOOS
==
"darwin"
{
err
:=
extractPayloadFiles
(
workdir
,
"llama.cpp/ggml-metal.metal"
)
err
:=
extractPayloadFiles
(
workdir
,
"llama.cpp/ggml-metal.metal"
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
...
@@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
os
.
Setenv
(
"GGML_METAL_PATH_RESOURCES"
,
workdir
)
os
.
Setenv
(
"GGML_METAL_PATH_RESOURCES"
,
workdir
)
}
}
libs
,
err
:=
extractDynamicLibs
(
workdir
,
"llama.cpp/build/*/*/lib/*"
)
libs
,
err
:=
extractDynamicLibs
(
workdir
,
"llama.cpp/build/*/*/
*/
lib/*"
)
if
err
!=
nil
{
if
err
!=
nil
{
if
err
==
payloadMissing
{
if
err
==
payloadMissing
{
log
.
Printf
(
"%s"
,
payloadMissing
)
log
.
Printf
(
"%s"
,
payloadMissing
)
...
@@ -151,45 +155,61 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
...
@@ -151,45 +155,61 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
}
}
libs
:=
[]
string
{}
libs
:=
[]
string
{}
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
// and tracking by version so we don't reexpand the files every time
// Also maybe consider lazy loading only what is needed
g
:=
new
(
errgroup
.
Group
)
for
_
,
file
:=
range
files
{
for
_
,
file
:=
range
files
{
pathComps
:=
strings
.
Split
(
file
,
"/"
)
pathComps
:=
strings
.
Split
(
file
,
"/"
)
if
len
(
pathComps
)
!=
pathComponentCount
{
if
len
(
pathComps
)
!=
pathComponentCount
{
log
.
Printf
(
"unexpected payload components: %v"
,
pathComps
)
log
.
Printf
(
"unexpected payload components: %v"
,
pathComps
)
continue
continue
}
}
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs
targetDir
:=
filepath
.
Join
(
workDir
,
pathComps
[
pathComponentCount
-
3
])
srcFile
,
err
:=
libEmbed
.
Open
(
file
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"read payload %s: %v"
,
file
,
err
)
}
defer
srcFile
.
Close
()
if
err
:=
os
.
MkdirAll
(
targetDir
,
0
o755
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"create payload temp dir %s: %v"
,
workDir
,
err
)
}
destFile
:=
filepath
.
Join
(
targetDir
,
filepath
.
Base
(
file
))
if
strings
.
Contains
(
destFile
,
"server"
)
{
libs
=
append
(
libs
,
destFile
)
}
_
,
err
=
os
.
Stat
(
destFile
)
file
:=
file
switch
{
g
.
Go
(
func
()
error
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
destFile
,
err
:=
os
.
OpenFile
(
destFile
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
0
o755
)
// Include the variant in the path to avoid conflicts between multiple server libs
targetDir
:=
filepath
.
Join
(
workDir
,
pathComps
[
pathComponentCount
-
3
])
srcFile
,
err
:=
libEmbed
.
Open
(
file
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"
write
payload %s: %v"
,
file
,
err
)
return
fmt
.
Errorf
(
"
read
payload %s: %v"
,
file
,
err
)
}
}
defer
dest
File
.
Close
()
defer
src
File
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
srcFile
);
err
!=
nil
{
if
err
:=
os
.
MkdirAll
(
targetDir
,
0
o755
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"c
opy
payload %s: %v"
,
file
,
err
)
return
fmt
.
Errorf
(
"c
reate
payload
temp dir
%s: %v"
,
workDir
,
err
)
}
}
case
err
!=
nil
:
src
:=
io
.
Reader
(
srcFile
)
return
nil
,
fmt
.
Errorf
(
"stat payload %s: %v"
,
file
,
err
)
filename
:=
file
}
if
strings
.
HasSuffix
(
file
,
".bz2"
)
{
src
=
bzip2
.
NewReader
(
src
)
filename
=
strings
.
TrimSuffix
(
filename
,
".bz2"
)
}
destFile
:=
filepath
.
Join
(
targetDir
,
filepath
.
Base
(
filename
))
if
strings
.
Contains
(
destFile
,
"server"
)
{
libs
=
append
(
libs
,
destFile
)
}
_
,
err
=
os
.
Stat
(
destFile
)
switch
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
destFile
,
err
:=
os
.
OpenFile
(
destFile
,
os
.
O_WRONLY
|
os
.
O_CREATE
|
os
.
O_TRUNC
,
0
o755
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"write payload %s: %v"
,
file
,
err
)
}
defer
destFile
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
src
);
err
!=
nil
{
return
fmt
.
Errorf
(
"copy payload %s: %v"
,
file
,
err
)
}
case
err
!=
nil
:
return
fmt
.
Errorf
(
"stat payload %s: %v"
,
file
,
err
)
}
return
nil
})
}
}
return
libs
,
nil
return
libs
,
g
.
Wait
()
}
}
func
extractPayloadFiles
(
workDir
,
glob
string
)
error
{
func
extractPayloadFiles
(
workDir
,
glob
string
)
error
{
...
@@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
...
@@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
if
err
:=
os
.
MkdirAll
(
workDir
,
0
o755
);
err
!=
nil
{
if
err
:=
os
.
MkdirAll
(
workDir
,
0
o755
);
err
!=
nil
{
return
fmt
.
Errorf
(
"create payload temp dir %s: %v"
,
workDir
,
err
)
return
fmt
.
Errorf
(
"create payload temp dir %s: %v"
,
workDir
,
err
)
}
}
src
:=
io
.
Reader
(
srcFile
)
filename
:=
file
if
strings
.
HasSuffix
(
file
,
".bz2"
)
{
src
=
bzip2
.
NewReader
(
src
)
filename
=
strings
.
TrimSuffix
(
filename
,
".bz2"
)
}
destFile
:=
filepath
.
Join
(
workDir
,
filepath
.
Base
(
file
))
destFile
:=
filepath
.
Join
(
workDir
,
filepath
.
Base
(
file
name
))
_
,
err
=
os
.
Stat
(
destFile
)
_
,
err
=
os
.
Stat
(
destFile
)
switch
{
switch
{
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
case
errors
.
Is
(
err
,
os
.
ErrNotExist
)
:
...
@@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
...
@@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
return
fmt
.
Errorf
(
"write payload %s: %v"
,
file
,
err
)
return
fmt
.
Errorf
(
"write payload %s: %v"
,
file
,
err
)
}
}
defer
destFile
.
Close
()
defer
destFile
.
Close
()
if
_
,
err
:=
io
.
Copy
(
destFile
,
src
File
);
err
!=
nil
{
if
_
,
err
:=
io
.
Copy
(
destFile
,
src
);
err
!=
nil
{
return
fmt
.
Errorf
(
"copy payload %s: %v"
,
file
,
err
)
return
fmt
.
Errorf
(
"copy payload %s: %v"
,
file
,
err
)
}
}
case
err
!=
nil
:
case
err
!=
nil
:
...
...
llm/payload_darwin_amd64.go
0 → 100644
View file @
df40b11d
package
llm
import
(
"embed"
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
var
libEmbed
embed
.
FS
llm/payload_darwin.go
→
llm/payload_darwin
_arm64
.go
View file @
df40b11d
...
@@ -4,5 +4,5 @@ import (
...
@@ -4,5 +4,5 @@ import (
"embed"
"embed"
)
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.
so
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/
arm64/
*/lib/*.
dylib*
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
llm/payload_linux.go
View file @
df40b11d
...
@@ -4,5 +4,5 @@ import (
...
@@ -4,5 +4,5 @@ import (
"embed"
"embed"
)
)
//go:embed llama.cpp/build/linux/*/lib/*.so
//go:embed llama.cpp/build/linux/*/
*/
lib/*.so
*
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
llm/payload_windows.go
View file @
df40b11d
...
@@ -4,5 +4,5 @@ import (
...
@@ -4,5 +4,5 @@ import (
"embed"
"embed"
)
)
//go:embed llama.cpp/build/windows/*/lib/*.dll
//go:embed llama.cpp/build/windows/*/
*/
lib/*.dll
*
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
scripts/build_darwin.sh
View file @
df40b11d
#!/bin/sh
#!/bin/sh
set
-e
u
set
-e
export
VERSION
=
${
VERSION
:-
0
.0.0
}
export
VERSION
=
${
VERSION
:-
0
.0.0
}
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/jmorganca/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/jmorganca/ollama/server.mode=release
\"
'"
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/jmorganca/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/jmorganca/ollama/server.mode=release
\"
'"
...
@@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
...
@@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
rm
-rf
llm/llama.cpp/build
rm
-rf
llm/llama.cpp/build
GOOS
=
darwin
GOARCH
=
$TARGETARCH
go generate ./...
GOOS
=
darwin
GOARCH
=
$TARGETARCH
go generate ./...
CGO_ENABLED
=
1
GOOS
=
darwin
GOARCH
=
$TARGETARCH
go build
-o
dist/ollama-darwin-
$TARGETARCH
CGO_ENABLED
=
1
GOOS
=
darwin
GOARCH
=
$TARGETARCH
go build
-o
dist/ollama-darwin-
$TARGETARCH
CGO_ENABLED
=
1
GOOS
=
darwin
GOARCH
=
$TARGETARCH
go build
-cover
-o
dist/ollama-darwin-
$TARGETARCH
-cov
done
done
lipo
-create
-output
dist/ollama dist/ollama-darwin-
*
lipo
-create
-output
dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm
-f
dist/ollama-darwin-
*
rm
-f
dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
codesign
--deep
--force
--options
=
runtime
--sign
"
$APPLE_IDENTITY
"
--timestamp
dist/ollama
if
[
-n
"
$APPLE_IDENTITY
"
]
;
then
codesign
--deep
--force
--options
=
runtime
--sign
"
$APPLE_IDENTITY
"
--timestamp
dist/ollama
else
echo
"Skipping code signing - set APPLE_IDENTITY"
fi
chmod
+x dist/ollama
chmod
+x dist/ollama
# build and sign the mac app
# build and
optionally
sign the mac app
npm
install
--prefix
app
npm
install
--prefix
app
npm run
--prefix
app make:sign
if
[
-n
"
$APPLE_IDENTITY
"
]
;
then
npm run
--prefix
app make:sign
else
npm run
--prefix
app make
fi
cp
app/out/make/zip/darwin/universal/Ollama-darwin-universal-
$VERSION
.zip dist/Ollama-darwin.zip
cp
app/out/make/zip/darwin/universal/Ollama-darwin-universal-
$VERSION
.zip dist/Ollama-darwin.zip
# sign the binary and rename it
# sign the binary and rename it
codesign
-f
--timestamp
-s
"
$APPLE_IDENTITY
"
--identifier
ai.ollama.ollama
--options
=
runtime dist/ollama
if
[
-n
"
$APPLE_IDENTITY
"
]
;
then
codesign
-f
--timestamp
-s
"
$APPLE_IDENTITY
"
--identifier
ai.ollama.ollama
--options
=
runtime dist/ollama
else
echo
"WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto
-c
-k
--keepParent
dist/ollama dist/temp.zip
ditto
-c
-k
--keepParent
dist/ollama dist/temp.zip
xcrun notarytool submit dist/temp.zip
--wait
--timeout
10m
--apple-id
$APPLE_ID
--password
$APPLE_PASSWORD
--team-id
$APPLE_TEAM_ID
if
[
-n
"
$APPLE_IDENTITY
"
]
;
then
xcrun notarytool submit dist/temp.zip
--wait
--timeout
10m
--apple-id
$APPLE_ID
--password
$APPLE_PASSWORD
--team-id
$APPLE_TEAM_ID
fi
mv
dist/ollama dist/ollama-darwin
mv
dist/ollama dist/ollama-darwin
rm
-f
dist/temp.zip
rm
-f
dist/temp.zip
scripts/build_remote.py
View file @
df40b11d
...
@@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
...
@@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
print
(
"Building"
)
print
(
"Building"
)
subprocess
.
check_call
([
'ssh'
,
netloc
,
'cd'
,
path
,
';'
,
GoCmd
,
'build'
,
'.'
])
subprocess
.
check_call
([
'ssh'
,
netloc
,
'cd'
,
path
,
';'
,
GoCmd
,
'build'
,
'.'
])
print
(
"Copying built result"
)
subprocess
.
check_call
([
'scp'
,
netloc
+
":"
+
path
+
"/ollama.exe"
,
'./dist/'
])
scripts/rh_linux_deps.sh
View file @
df40b11d
...
@@ -28,6 +28,7 @@ fi
...
@@ -28,6 +28,7 @@ fi
if
[
-n
"
${
CMAKE_VERSION
}
"
]
;
then
if
[
-n
"
${
CMAKE_VERSION
}
"
]
;
then
curl
-s
-L
https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-linux-
$(
uname
-m
)
.tar.gz |
tar
-zx
-C
/usr
--strip-components
1
curl
-s
-L
https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-linux-
$(
uname
-m
)
.tar.gz |
tar
-zx
-C
/usr
--strip-components
1
dnf
install
-y
bzip2
fi
fi
if
[
-n
"
${
GOLANG_VERSION
}
"
]
;
then
if
[
-n
"
${
GOLANG_VERSION
}
"
]
;
then
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment