"docs/vscode:/vscode.git/clone" did not exist on "3651b14cf43d501e3a30e8c56469134c39d91bea"
Commit 1b249748 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Add multiple CPU variants for Intel Mac

This also refines the build process for the ext_server build.
parent d5a73533
...@@ -86,6 +86,9 @@ jobs: ...@@ -86,6 +86,9 @@ jobs:
- os: windows-latest - os: windows-latest
arch: arm64 arch: arm64
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
env:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: "1"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
......
...@@ -10,6 +10,7 @@ COPY llm llm ...@@ -10,6 +10,7 @@ COPY llm llm
FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...@@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ...@@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
...@@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ...@@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...@@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh ...@@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64 FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...@@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64 ...@@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...@@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64 ...@@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
ARG CMAKE_VERSION ARG CMAKE_VERSION
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG OLLAMA_CUSTOM_CPU_DEFS ARG OLLAMA_CUSTOM_CPU_DEFS
ARG CGO_CFLAGS
COPY ./scripts/rh_linux_deps.sh / COPY ./scripts/rh_linux_deps.sh /
RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...@@ -72,7 +78,7 @@ RUN sh gen_linux.sh ...@@ -72,7 +78,7 @@ RUN sh gen_linux.sh
FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOFLAGS ARG GOFLAGS
ARG CGO_FLAGS ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
...@@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64 ...@@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
ENV CGO_ENABLED 1 ENV CGO_ENABLED 1
ARG GOLANG_VERSION ARG GOLANG_VERSION
ARG GOFLAGS ARG GOFLAGS
ARG CGO_FLAGS ARG CGO_CFLAGS
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
COPY . . COPY . .
COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/ COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#ifdef __linux__ #ifdef __linux__
#include <dlfcn.h> #include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags | RTLD_DEEPBIND) #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym) #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() strdup(dlerror()) #define LOAD_ERR() strdup(dlerror())
#define UNLOAD_LIBRARY(handle) dlclose(handle) #define UNLOAD_LIBRARY(handle) dlclose(handle)
...@@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s, ...@@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
{"", NULL}, {"", NULL},
}; };
printf("loading %s library\n", libPath); printf("loading library %s\n", libPath);
s->handle = LOAD_LIBRARY(libPath, RTLD_NOW); s->handle = LOAD_LIBRARY(libPath, RTLD_GLOBAL|RTLD_NOW);
if (!s->handle) { if (!s->handle) {
err->id = -1; err->id = -1;
char *msg = LOAD_ERR(); char *msg = LOAD_ERR();
......
...@@ -372,15 +372,6 @@ func updatePath(dir string) { ...@@ -372,15 +372,6 @@ func updatePath(dir string) {
newPath := strings.Join(append([]string{dir}, pathComponents...), ";") newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
log.Printf("Updating PATH to %s", newPath) log.Printf("Updating PATH to %s", newPath)
os.Setenv("PATH", newPath) os.Setenv("PATH", newPath)
} else {
pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
for _, comp := range pathComponents {
if comp == dir {
return
}
}
newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
os.Setenv("LD_LIBRARY_PATH", newPath)
} }
// linux and darwin rely on rpath
} }
...@@ -2,28 +2,24 @@ ...@@ -2,28 +2,24 @@
set(TARGET ext_server) set(TARGET ext_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON) option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp) if (WIN32)
add_library(${TARGET} SHARED ../../../ext_server/ext_server.cpp ../../llama.cpp)
else()
add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp ../../llama.cpp)
endif()
target_include_directories(${TARGET} PRIVATE ../../common) target_include_directories(${TARGET} PRIVATE ../../common)
target_include_directories(${TARGET} PRIVATE ../..) target_include_directories(${TARGET} PRIVATE ../..)
target_include_directories(${TARGET} PRIVATE ../../..) target_include_directories(${TARGET} PRIVATE ../../..)
target_compile_features(${TARGET} PRIVATE cxx_std_11) target_compile_features(${TARGET} PRIVATE cxx_std_11)
target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1) target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE ggml llava common )
target_compile_definitions(${TARGET} PRIVATE set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}> target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>)
) install(TARGETS ext_server LIBRARY)
if (BUILD_SHARED_LIBS)
set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD)
add_library(ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>)
target_link_libraries(ext_server_shared PRIVATE ggml llama llava common ${CMAKE_THREAD_LIBS_INIT})
install(TARGETS ext_server_shared LIBRARY)
endif()
if (CUDAToolkit_FOUND) if (CUDAToolkit_FOUND)
target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
if (WIN32) if (WIN32)
target_link_libraries(ext_server_shared PRIVATE nvml) target_link_libraries(${TARGET} PRIVATE nvml)
endif() endif()
endif() endif()
\ No newline at end of file
# common logic accross linux and darwin # common logic accross linux and darwin
init_vars() { init_vars() {
case "${GOARCH}" in
"amd64")
ARCH="x86_64"
;;
"arm64")
ARCH="arm64"
;;
*)
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
esac
LLAMACPP_DIR=../llama.cpp LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS="" CMAKE_DEFS=""
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static" CMAKE_TARGETS="--target ext_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on" CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
else else
# TODO - add additional optimization flags... # TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off" CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
fi fi
case $(uname -s) in
"Darwin")
LIB_EXT="dylib"
WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}"
;;
"Linux")
LIB_EXT="so"
WHOLE_ARCHIVE="-Wl,--whole-archive"
NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
# Cross compiling not supported on linux - Use docker
GCC_ARCH=""
;;
*)
;;
esac
} }
git_module_setup() { git_module_setup() {
...@@ -40,25 +69,29 @@ apply_patches() { ...@@ -40,25 +69,29 @@ apply_patches() {
build() { build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS} cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8 cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
mkdir -p ${BUILD_DIR}/lib/
g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
${GCC_ARCH} \
${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
${BUILD_DIR}/common/libcommon.a \
${BUILD_DIR}/libllama.a \
-Wl,-rpath,\$ORIGIN \
-lpthread -ldl -lm \
${EXTRA_LIBS}
} }
install() { compress_libs() {
rm -rf ${BUILD_DIR}/lib echo "Compressing payloads to reduce overall binary size..."
mkdir -p ${BUILD_DIR}/lib pids=""
cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib bzip2 -v9 ${lib} &
cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib pids+=" $!"
cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib done
} echo
for pid in ${pids}; do
link_server_lib() { wait $pid
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ done
-Wl,--whole-archive \ echo "Finished compression"
${BUILD_DIR}/lib/libext_server.a \
-Wl,--no-whole-archive \
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
-lstdc++
} }
# Keep the local tree clean after we're done with the build # Keep the local tree clean after we're done with the build
......
...@@ -9,16 +9,52 @@ set -o pipefail ...@@ -9,16 +9,52 @@ set -o pipefail
echo "Starting darwin generate script" echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh source $(dirname $0)/gen_common.sh
init_vars init_vars
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}" git_module_setup
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal" apply_patches
COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=off"
case "${GOARCH}" in case "${GOARCH}" in
"amd64") "amd64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"
ARCH="x86_64"
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu"
echo "Building LCD CPU"
build
compress_libs
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx"
echo "Building AVX CPU"
build
compress_libs
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU"
build
compress_libs
;; ;;
"arm64") "arm64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
ARCH="arm64" BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/metal"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
compress_libs
;; ;;
*) *)
echo "GOARCH must be set" echo "GOARCH must be set"
...@@ -27,21 +63,4 @@ case "${GOARCH}" in ...@@ -27,21 +63,4 @@ case "${GOARCH}" in
;; ;;
esac esac
git_module_setup
apply_patches
build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
-arch ${ARCH} \
-Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
${BUILD_DIR}/lib/libggml_static.a \
-lpthread -ldl -lm -lc++ \
-framework Accelerate \
-framework Foundation \
-framework Metal \
-framework MetalKit \
-framework MetalPerformanceShaders
cleanup cleanup
...@@ -2,16 +2,14 @@ ...@@ -2,16 +2,14 @@
# This script is intended to run inside the go generate # This script is intended to run inside the go generate
# working directory must be llm/generate/ # working directory must be llm/generate/
# First we build our default built-in library which will be linked into the CGO # First we build one or more CPU based LLM libraries
# binary as a normal dependency. This default build is CPU based.
# #
# Then we build a CUDA dynamic library (although statically linked with the CUDA # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies for maximum portability) # library dependencies
# #
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
# important to be a dynamic lib even if it's the only GPU library detected because # libraries are quite large, and also dynamically load data files at runtime
# we can't redistribute the objectfiles but must rely on dynamic libraries at # which in turn are large, so we don't attempt to cary them as payload
# runtime, which could lead the server not to start if not present.
set -ex set -ex
set -o pipefail set -o pipefail
...@@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then ...@@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\"" echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}" CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
echo "Building custom CPU" echo "Building custom CPU"
build build
install compress_libs
link_server_lib
else else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512 # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
...@@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then ...@@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta) # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
# #
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
echo "Building LCD CPU" echo "Building LCD CPU"
build build
install compress_libs
link_server_lib
# #
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
...@@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then ...@@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# #
init_vars init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
echo "Building AVX CPU" echo "Building AVX CPU"
build build
install compress_libs
link_server_lib
# #
# ~2013 CPU Dynamic library # ~2013 CPU Dynamic library
...@@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then ...@@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# #
init_vars init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}" CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU" echo "Building AVX2 CPU"
build build
install compress_libs
link_server_lib
fi fi
else else
echo "Skipping CPU generation step as requested" echo "Skipping CPU generation step as requested"
...@@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then ...@@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR} CUDA_VARIANT=_v${CUDA_MAJOR}
fi fi
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}" CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
build build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ # Cary the CUDA libs as payloads to help reduce dependency burden on users
-Wl,--whole-archive \ #
${BUILD_DIR}/lib/libext_server.a \ # TODO - in the future we may shift to packaging these separately and conditionally
${BUILD_DIR}/lib/libcommon.a \ # downloading them in the install script.
${BUILD_DIR}/lib/libllama.a \ DEPS="$(ldd ${BUILD_DIR}/lib/libext_server.so )"
-Wl,--no-whole-archive \ for lib in libcudart.so libcublas.so libcublasLt.so ; do
${CUDA_LIB_DIR}/libcudart_static.a \ DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
${CUDA_LIB_DIR}/libcublas_static.a \ if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
${CUDA_LIB_DIR}/libcublasLt_static.a \ cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/lib/"
${CUDA_LIB_DIR}/libcudadevrt.a \ elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
${CUDA_LIB_DIR}/libculibos.a \ cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/lib/"
-lcuda \ else
-lrt -lpthread -ldl -lstdc++ -lm cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/lib/"
fi
done
compress_libs
fi fi
if [ -z "${ROCM_PATH}" ]; then if [ -z "${ROCM_PATH}" ]; then
...@@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then ...@@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
fi fi
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}" BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build build
install
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \ # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
-Wl,--whole-archive \ # them being present at runtime on the host
${BUILD_DIR}/lib/libext_server.a \ compress_libs
${BUILD_DIR}/lib/libcommon.a \
${BUILD_DIR}/lib/libllama.a \
-Wl,--no-whole-archive \
-lrt -lpthread -ldl -lstdc++ -lm \
-L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
-Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
-lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
fi fi
cleanup cleanup
...@@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop" ...@@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
function init_vars { function init_vars {
$script:llamacppDir = "../llama.cpp" $script:llamacppDir = "../llama.cpp"
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64") $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-A","x64")
$script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static") $script:cmakeTargets = @("ext_server")
$script:ARCH = "amd64" # arm not yet supported.
if ($env:CGO_CFLAGS -contains "-g") { if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on") $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
$script:config = "RelWithDebInfo" $script:config = "RelWithDebInfo"
...@@ -13,6 +14,17 @@ function init_vars { ...@@ -13,6 +14,17 @@ function init_vars {
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off") $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
$script:config = "Release" $script:config = "Release"
} }
# Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path
if ($d -ne $null) {
$script:CUDA_LIB_DIR=($d| split-path -parent)
}
} else {
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
}
$script:BZIP2=(get-command -ea 'silentlycontinue' bzip2).path
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
} }
function git_module_setup { function git_module_setup {
...@@ -47,11 +59,25 @@ function build { ...@@ -47,11 +59,25 @@ function build {
function install { function install {
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null md "${script:buildDir}/lib" -ea 0 > $null
cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib" cp "${script:buildDir}/bin/${script:config}/ext_server.dll" "${script:buildDir}/lib"
cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib" cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
# Display the dll dependencies in the build log # Display the dll dependencies in the build log
dumpbin /dependents "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" | select-string ".dll" if ($script:DUMPBIN -ne $null) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
}
}
function compress_libs {
if ($script:BZIP2 -eq $null) {
write-host "bzip2 not installed, not compressing files"
return
}
write-host "Compressing dlls..."
$libs = dir "${script:buildDir}/lib/*.dll"
foreach ($file in $libs) {
& "$script:BZIP2" -v9 $file
}
} }
function cleanup { function cleanup {
...@@ -71,33 +97,47 @@ apply_patches ...@@ -71,33 +97,47 @@ apply_patches
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off") $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
write-host "Building LCD CPU" write-host "Building LCD CPU"
build build
install install
compress_libs
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
write-host "Building AVX CPU" write-host "Building AVX CPU"
build build
install install
compress_libs
$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx2" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
write-host "Building AVX2 CPU" write-host "Building AVX2 CPU"
build build
install install
compress_libs
# Then build cuda as a dynamically loaded library if ($null -ne $script:CUDA_LIB_DIR) {
# TODO figure out how to detect cuda version # Then build cuda as a dynamically loaded library
init_vars $nvcc = (get-command -ea 'silentlycontinue' nvcc)
$script:buildDir="${script:llamacppDir}/build/windows/cuda" if ($null -ne $nvcc) {
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on") $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
build }
install if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
build
install
cp "${script:CUDA_LIB_DIR}/cudart64_*.dll" "${script:buildDir}/lib"
cp "${script:CUDA_LIB_DIR}/cublas64_*.dll" "${script:buildDir}/lib"
cp "${script:CUDA_LIB_DIR}/cublasLt64_*.dll" "${script:buildDir}/lib"
compress_libs
}
# TODO - actually implement ROCm support on windows # TODO - actually implement ROCm support on windows
$script:buildDir="${script:llamacppDir}/build/windows/rocm" $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
rm -ea 0 -recurse -force -path "${script:buildDir}/lib" rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
md "${script:buildDir}/lib" -ea 0 > $null md "${script:buildDir}/lib" -ea 0 > $null
......
package llm package llm
import ( import (
"compress/bzip2"
"errors" "errors"
"fmt" "fmt"
"golang.org/x/exp/slices"
"io" "io"
"io/fs" "io/fs"
"log" "log"
...@@ -12,6 +12,9 @@ import ( ...@@ -12,6 +12,9 @@ import (
"runtime" "runtime"
"strings" "strings"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"github.com/jmorganca/ollama/gpu" "github.com/jmorganca/ollama/gpu"
) )
...@@ -20,7 +23,7 @@ import ( ...@@ -20,7 +23,7 @@ import (
// Any library without a variant is the lowest common denominator // Any library without a variant is the lowest common denominator
var availableDynLibs = map[string]string{} var availableDynLibs = map[string]string{}
const pathComponentCount = 6 const pathComponentCount = 7
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
func getDynLibs(gpuInfo gpu.GpuInfo) []string { func getDynLibs(gpuInfo gpu.GpuInfo) []string {
...@@ -100,6 +103,7 @@ func rocmDynLibPresent() bool { ...@@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
} }
func nativeInit(workdir string) error { func nativeInit(workdir string) error {
log.Printf("Extracting dynamic libraries...")
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal") err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
if err != nil { if err != nil {
...@@ -113,7 +117,7 @@ func nativeInit(workdir string) error { ...@@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir) os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
} }
libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*") libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
if err != nil { if err != nil {
if err == payloadMissing { if err == payloadMissing {
log.Printf("%s", payloadMissing) log.Printf("%s", payloadMissing)
...@@ -151,25 +155,39 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -151,25 +155,39 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
} }
libs := []string{} libs := []string{}
// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
// and tracking by version so we don't reexpand the files every time
// Also maybe consider lazy loading only what is needed
g := new(errgroup.Group)
for _, file := range files { for _, file := range files {
pathComps := strings.Split(file, "/") pathComps := strings.Split(file, "/")
if len(pathComps) != pathComponentCount { if len(pathComps) != pathComponentCount {
log.Printf("unexpected payload components: %v", pathComps) log.Printf("unexpected payload components: %v", pathComps)
continue continue
} }
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
file := file
g.Go(func() error {
// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs // Include the variant in the path to avoid conflicts between multiple server libs
targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3]) targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
srcFile, err := libEmbed.Open(file) srcFile, err := libEmbed.Open(file)
if err != nil { if err != nil {
return nil, fmt.Errorf("read payload %s: %v", file, err) return fmt.Errorf("read payload %s: %v", file, err)
} }
defer srcFile.Close() defer srcFile.Close()
if err := os.MkdirAll(targetDir, 0o755); err != nil { if err := os.MkdirAll(targetDir, 0o755); err != nil {
return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
}
src := io.Reader(srcFile)
filename := file
if strings.HasSuffix(file, ".bz2") {
src = bzip2.NewReader(src)
filename = strings.TrimSuffix(filename, ".bz2")
} }
destFile := filepath.Join(targetDir, filepath.Base(file)) destFile := filepath.Join(targetDir, filepath.Base(filename))
if strings.Contains(destFile, "server") { if strings.Contains(destFile, "server") {
libs = append(libs, destFile) libs = append(libs, destFile)
} }
...@@ -179,17 +197,19 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) { ...@@ -179,17 +197,19 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
case errors.Is(err, os.ErrNotExist): case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755) destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil { if err != nil {
return nil, fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("write payload %s: %v", file, err)
} }
defer destFile.Close() defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil { if _, err := io.Copy(destFile, src); err != nil {
return nil, fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("copy payload %s: %v", file, err)
} }
case err != nil: case err != nil:
return nil, fmt.Errorf("stat payload %s: %v", file, err) return fmt.Errorf("stat payload %s: %v", file, err)
} }
return nil
})
} }
return libs, nil return libs, g.Wait()
} }
func extractPayloadFiles(workDir, glob string) error { func extractPayloadFiles(workDir, glob string) error {
...@@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
if err := os.MkdirAll(workDir, 0o755); err != nil { if err := os.MkdirAll(workDir, 0o755); err != nil {
return fmt.Errorf("create payload temp dir %s: %v", workDir, err) return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
} }
src := io.Reader(srcFile)
filename := file
if strings.HasSuffix(file, ".bz2") {
src = bzip2.NewReader(src)
filename = strings.TrimSuffix(filename, ".bz2")
}
destFile := filepath.Join(workDir, filepath.Base(file)) destFile := filepath.Join(workDir, filepath.Base(filename))
_, err = os.Stat(destFile) _, err = os.Stat(destFile)
switch { switch {
case errors.Is(err, os.ErrNotExist): case errors.Is(err, os.ErrNotExist):
...@@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error { ...@@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
return fmt.Errorf("write payload %s: %v", file, err) return fmt.Errorf("write payload %s: %v", file, err)
} }
defer destFile.Close() defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil { if _, err := io.Copy(destFile, src); err != nil {
return fmt.Errorf("copy payload %s: %v", file, err) return fmt.Errorf("copy payload %s: %v", file, err)
} }
case err != nil: case err != nil:
......
package llm
import (
"embed"
)
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
var libEmbed embed.FS
...@@ -4,5 +4,5 @@ import ( ...@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.so //go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/arm64/*/lib/*.dylib*
var libEmbed embed.FS var libEmbed embed.FS
...@@ -4,5 +4,5 @@ import ( ...@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/linux/*/lib/*.so //go:embed llama.cpp/build/linux/*/*/lib/*.so*
var libEmbed embed.FS var libEmbed embed.FS
...@@ -4,5 +4,5 @@ import ( ...@@ -4,5 +4,5 @@ import (
"embed" "embed"
) )
//go:embed llama.cpp/build/windows/*/lib/*.dll //go:embed llama.cpp/build/windows/*/*/lib/*.dll*
var libEmbed embed.FS var libEmbed embed.FS
#!/bin/sh #!/bin/sh
set -eu set -e
export VERSION=${VERSION:-0.0.0} export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'" export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
...@@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do ...@@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
rm -rf llm/llama.cpp/build rm -rf llm/llama.cpp/build
GOOS=darwin GOARCH=$TARGETARCH go generate ./... GOOS=darwin GOARCH=$TARGETARCH go generate ./...
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -cover -o dist/ollama-darwin-$TARGETARCH-cov
done done
lipo -create -output dist/ollama dist/ollama-darwin-* lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
rm -f dist/ollama-darwin-* rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama if [ -n "$APPLE_IDENTITY" ]; then
codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
else
echo "Skipping code signing - set APPLE_IDENTITY"
fi
chmod +x dist/ollama chmod +x dist/ollama
# build and sign the mac app # build and optionally sign the mac app
npm install --prefix app npm install --prefix app
npm run --prefix app make:sign if [ -n "$APPLE_IDENTITY" ]; then
npm run --prefix app make:sign
else
npm run --prefix app make
fi
cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
# sign the binary and rename it # sign the binary and rename it
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
else
echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
fi
ditto -c -k --keepParent dist/ollama dist/temp.zip ditto -c -k --keepParent dist/ollama dist/temp.zip
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID if [ -n "$APPLE_IDENTITY" ]; then
xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
fi
mv dist/ollama dist/ollama-darwin mv dist/ollama dist/ollama-darwin
rm -f dist/temp.zip rm -f dist/temp.zip
...@@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './... ...@@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
print("Building") print("Building")
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.']) subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
print("Copying built result")
subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe", './dist/'])
...@@ -28,6 +28,7 @@ fi ...@@ -28,6 +28,7 @@ fi
if [ -n "${CMAKE_VERSION}" ]; then if [ -n "${CMAKE_VERSION}" ]; then
curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1 curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
dnf install -y bzip2
fi fi
if [ -n "${GOLANG_VERSION}" ]; then if [ -n "${GOLANG_VERSION}" ]; then
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment