Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
77d96da9
Commit
77d96da9
authored
Jan 04, 2024
by
Daniel Hiltgen
Browse files
Code shuffle to clean up the llm dir
parent
b5939008
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
54 additions
and
47 deletions
+54
-47
.dockerignore
.dockerignore
+1
-1
.gitmodules
.gitmodules
+1
-1
llm/ext_server/CMakeLists.txt
llm/ext_server/CMakeLists.txt
+1
-1
llm/ext_server/README.md
llm/ext_server/README.md
+4
-0
llm/ext_server/ext_server.cpp
llm/ext_server/ext_server.cpp
+0
-0
llm/ext_server/ext_server.h
llm/ext_server/ext_server.h
+0
-0
llm/ext_server_common.go
llm/ext_server_common.go
+9
-9
llm/generate/gen_common.sh
llm/generate/gen_common.sh
+7
-7
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+2
-2
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+4
-4
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+12
-11
llm/generate/generate_darwin.go
llm/generate/generate_darwin.go
+1
-1
llm/generate/generate_linux.go
llm/generate/generate_linux.go
+1
-1
llm/generate/generate_windows.go
llm/generate/generate_windows.go
+1
-1
llm/shim_darwin.go
llm/shim_darwin.go
+2
-2
llm/shim_ext_server.go
llm/shim_ext_server.go
+6
-4
llm/shim_ext_server_linux.go
llm/shim_ext_server_linux.go
+1
-1
llm/shim_ext_server_windows.go
llm/shim_ext_server_windows.go
+1
-1
tmp
tmp
+0
-0
No files found.
.dockerignore
View file @
77d96da9
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
ollama
ollama
app
app
dist
dist
llm/llama.cpp
/gguf
llm/llama.cpp
.env
.env
.cache
.cache
test_data
test_data
\ No newline at end of file
.gitmodules
View file @
77d96da9
[submodule "llm/llama.cpp/gguf"]
[submodule "llm/llama.cpp/gguf"]
path = llm/llama.cpp/gguf
path = llm/llama.cpp/gguf
url = https://github.com/ggerganov/llama.cpp.git
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
ignore = dirty
shallow = true
shallow = true
llm/
llama.cpp
/CMakeLists.txt
→
llm/
ext_server
/CMakeLists.txt
View file @
77d96da9
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
set
(
TARGET ext_server
)
set
(
TARGET ext_server
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
add_library
(
${
TARGET
}
STATIC ../../../ext_server.cpp
)
add_library
(
${
TARGET
}
STATIC ../../../ext_server
/ext_server
.cpp
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../common
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../common
)
target_include_directories
(
${
TARGET
}
PRIVATE ../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../..
)
target_include_directories
(
${
TARGET
}
PRIVATE ../../..
)
...
...
llm/ext_server/README.md
0 → 100644
View file @
77d96da9
# Extern C Server
This directory contains a thin facade we layer on top of the Llama.cpp server
to expose
`extern C`
interfaces to access the functionality through direct API calls in-process
llm/
llama.cpp
/ext_server.cpp
→
llm/
ext_server
/ext_server.cpp
View file @
77d96da9
File moved
llm/
llama.cpp
/ext_server.h
→
llm/
ext_server
/ext_server.h
View file @
77d96da9
File moved
llm/ext_server_common.go
View file @
77d96da9
package
llm
package
llm
/*
/*
#cgo CFLAGS: -I${SRCDIR}/
llama.cpp
-I${SRCDIR}/llama.cpp
/gguf
-I${SRCDIR}/llama.cpp/
gguf/
common -I${SRCDIR}/llama.cpp/
gguf/
examples/server
#cgo CFLAGS: -I${SRCDIR}/
ext_server
-I${SRCDIR}/llama.cpp -I${SRCDIR}/llama.cpp/common -I${SRCDIR}/llama.cpp/examples/server
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
...
@@ -10,17 +10,17 @@ package llm
...
@@ -10,17 +10,17 @@ package llm
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/darwin/metal/lib/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/darwin/metal/lib/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/darwin/metal/lib/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/darwin/metal/lib/libggml_static.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
#cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/linux/cpu/lib/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/linux/cpu/lib/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/linux/cpu/lib/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/
gguf/
build/linux/cpu/lib/libggml_static.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
#cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
#cgo linux windows LDFLAGS: -lpthread
#cgo linux windows LDFLAGS: -lpthread
...
...
llm/
llama.cpp
/gen_common.sh
→
llm/
generate
/gen_common.sh
View file @
77d96da9
# common logic accross linux and darwin
# common logic accross linux and darwin
init_vars
()
{
init_vars
()
{
LLAMACPP_DIR
=
gguf
LLAMACPP_DIR
=
../llama.cpp
PATCHES
=
"0001-Expose-callable-API-for-server.patch"
PATCHES
=
"0001-Expose-callable-API-for-server.patch"
CMAKE_DEFS
=
""
CMAKE_DEFS
=
""
CMAKE_TARGETS
=
"--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
CMAKE_TARGETS
=
"--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
...
@@ -19,18 +19,18 @@ git_module_setup() {
...
@@ -19,18 +19,18 @@ git_module_setup() {
return
return
fi
fi
git submodule init
git submodule init
git submodule update
--force
gguf
git submodule update
--force
${
LLAMACPP_DIR
}
}
}
apply_patches
()
{
apply_patches
()
{
# Wire up our CMakefile
# Wire up our CMakefile
if
!
grep
ollama
gguf
/examples/server/CMakeLists.txt
;
then
if
!
grep
ollama
${
LLAMACPP_DIR
}
/examples/server/CMakeLists.txt
;
then
echo
'include (../../../CMakeLists.txt) # ollama'
>>
gguf
/examples/server/CMakeLists.txt
echo
'include (../../../
ext_server/
CMakeLists.txt) # ollama'
>>
${
LLAMACPP_DIR
}
/examples/server/CMakeLists.txt
fi
fi
# Avoid duplicate main symbols when we link into the cgo binary
# Avoid duplicate main symbols when we link into the cgo binary
sed
-e
's/int main(/int __main(/g'
<
./gguf
/examples/server/server.cpp
>
./gguf
/examples/server/server.cpp.tmp
&&
sed
-e
's/int main(/int __main(/g'
<
${
LLAMACPP_DIR
}
/examples/server/server.cpp
>
${
LLAMACPP_DIR
}
/examples/server/server.cpp.tmp
&&
mv
./gguf
/examples/server/server.cpp.tmp
./gguf
/examples/server/server.cpp
mv
${
LLAMACPP_DIR
}
/examples/server/server.cpp.tmp
${
LLAMACPP_DIR
}
/examples/server/server.cpp
}
}
build
()
{
build
()
{
...
@@ -49,5 +49,5 @@ install() {
...
@@ -49,5 +49,5 @@ install() {
# Keep the local tree clean after we're done with the build
# Keep the local tree clean after we're done with the build
cleanup
()
{
cleanup
()
{
(
cd
gguf
/examples/server/
&&
git checkout CMakeLists.txt server.cpp
)
(
cd
${
LLAMACPP_DIR
}
/examples/server/
&&
git checkout CMakeLists.txt server.cpp
)
}
}
llm/
llama.cpp
/gen_darwin.sh
→
llm/
generate
/gen_darwin.sh
View file @
77d96da9
#!/bin/bash
#!/bin/bash
# This script is intended to run inside the go generate
# This script is intended to run inside the go generate
# working directory must be
.
./llm/
llama.cpp
# working directory must be ./llm/
generate/
# TODO - add hardening to detect missing tools (cmake, etc.)
# TODO - add hardening to detect missing tools (cmake, etc.)
...
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
...
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
source
$(
dirname
$0
)
/gen_common.sh
source
$(
dirname
$0
)
/gen_common.sh
init_vars
init_vars
CMAKE_DEFS
=
"-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
gguf
/build/darwin/metal"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/darwin/metal"
case
"
${
GOARCH
}
"
in
case
"
${
GOARCH
}
"
in
"amd64"
)
"amd64"
)
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
...
...
llm/
llama.cpp
/gen_linux.sh
→
llm/
generate
/gen_linux.sh
View file @
77d96da9
#!/bin/bash
#!/bin/bash
# This script is intended to run inside the go generate
# This script is intended to run inside the go generate
# working directory must be llm/
llama.cpp
# working directory must be llm/
generate/
# First we build our default built-in library which will be linked into the CGO
# First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based.
# binary as a normal dependency. This default build is CPU based.
...
@@ -52,7 +52,7 @@ apply_patches
...
@@ -52,7 +52,7 @@ apply_patches
# CPU first for the default library
# CPU first for the default library
#
#
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
gguf
/build/linux/cpu"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cpu"
build
build
install
install
...
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
...
@@ -64,7 +64,7 @@ if [ -d /usr/local/cuda/lib64/ ]; then
echo
"CUDA libraries detected - building dynamic CUDA library"
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
init_vars
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
gguf
/build/linux/cuda"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cuda"
CUDA_LIB_DIR
=
/usr/local/cuda/lib64
CUDA_LIB_DIR
=
/usr/local/cuda/lib64
build
build
install
install
...
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
...
@@ -98,7 +98,7 @@ if [ -d "${ROCM_PATH}" ]; then
echo
"ROCm libraries detected - building dynamic ROCm library"
echo
"ROCm libraries detected - building dynamic ROCm library"
init_vars
init_vars
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=
$ROCM_PATH
/llvm/bin/clang -DCMAKE_CXX_COMPILER=
$ROCM_PATH
/llvm/bin/clang++ -DAMDGPU_TARGETS=
$(
amdGPUs
)
-DGPU_TARGETS=
$(
amdGPUs
)
"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=
$ROCM_PATH
/llvm/bin/clang -DCMAKE_CXX_COMPILER=
$ROCM_PATH
/llvm/bin/clang++ -DAMDGPU_TARGETS=
$(
amdGPUs
)
-DGPU_TARGETS=
$(
amdGPUs
)
"
BUILD_DIR
=
"
gguf
/build/linux/rocm"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/rocm"
build
build
install
install
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
...
...
llm/
llama.cpp
/gen_windows.ps1
→
llm/
generate
/gen_windows.ps1
View file @
77d96da9
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
$Error
ActionPreference
=
"Stop"
$Error
ActionPreference
=
"Stop"
function
init_vars
{
function
init_vars
{
$
script
:
llamacppDir
=
"../llama.cpp"
$
script
:
patches
=
@(
"0001-Expose-callable-API-for-server.patch"
)
$
script
:
patches
=
@(
"0001-Expose-callable-API-for-server.patch"
)
$
script
:
cmakeDefs
=
@(
"-DBUILD_SHARED_LIBS=on"
,
"-DLLAMA_NATIVE=off"
,
"-DLLAMA_F16C=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX=on"
,
"-A"
,
"x64"
)
$
script
:
cmakeDefs
=
@(
"-DBUILD_SHARED_LIBS=on"
,
"-DLLAMA_NATIVE=off"
,
"-DLLAMA_F16C=off"
,
"-DLLAMA_FMA=off"
,
"-DLLAMA_AVX512=off"
,
"-DLLAMA_AVX2=off"
,
"-DLLAMA_AVX=on"
,
"-A"
,
"x64"
)
$
script
:
cmakeTargets
=
@(
"ggml"
,
"ggml_static"
,
"llama"
,
"build_info"
,
"common"
,
"ext_server_shared"
,
"llava_static"
)
$
script
:
cmakeTargets
=
@(
"ggml"
,
"ggml_static"
,
"llama"
,
"build_info"
,
"common"
,
"ext_server_shared"
,
"llava_static"
)
...
@@ -19,25 +20,25 @@ function git_module_setup {
...
@@ -19,25 +20,25 @@ function git_module_setup {
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
&
git
submodule
init
&
git
submodule
init
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
&
git
submodule
update
--force
gguf
&
git
submodule
update
--force
"
${script:llamacppDir}
"
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
}
}
function
apply_patches
{
function
apply_patches
{
# Wire up our CMakefile
# Wire up our CMakefile
if
(
!
(
Select-String
-Path
"
gguf
/examples/server/CMakeLists.txt"
-Pattern
'ollama'
))
{
if
(
!
(
Select-String
-Path
"
${script:llamacppDir}
/examples/server/CMakeLists.txt"
-Pattern
'ollama'
))
{
Add-Content
-Path
"
gguf
/examples/server/CMakeLists.txt"
-Value
'include (../../../CMakeLists.txt) # ollama'
Add-Content
-Path
"
${script:llamacppDir}
/examples/server/CMakeLists.txt"
-Value
'include (../../../
ext_server/
CMakeLists.txt) # ollama'
}
}
# Avoid duplicate main symbols when we link into the cgo binary
# Avoid duplicate main symbols when we link into the cgo binary
$content
=
Get-Content
-Path
"
./gguf
/examples/server/server.cpp"
$content
=
Get-Content
-Path
"
${script:llamacppDir}
/examples/server/server.cpp"
$content
=
$content
-replace
'int main\('
,
'int __main('
$content
=
$content
-replace
'int main\('
,
'int __main('
Set-Content
-Path
"
./gguf
/examples/server/server.cpp"
-Value
$content
Set-Content
-Path
"
${script:llamacppDir}
/examples/server/server.cpp"
-Value
$content
}
}
function
build
{
function
build
{
write-host
"generating config with: cmake -S
gguf
-B
$
script
:
buildDir
$
script
:
cmakeDefs
"
write-host
"generating config with: cmake -S
${script:llamacppDir}
-B
$
script
:
buildDir
$
script
:
cmakeDefs
"
&
cmake
--version
&
cmake
--version
&
cmake
-S
gguf
-B
$
script
:
buildDir
$
script
:
cmakeDefs
&
cmake
-S
"
${script:llamacppDir}
"
-B
$
script
:
buildDir
$
script
:
cmakeDefs
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
| ForEach-Object { "
--target
",
$_
})"
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
| ForEach-Object { "
--target
",
$_
})"
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
})
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
})
...
@@ -55,7 +56,7 @@ function install {
...
@@ -55,7 +56,7 @@ function install {
}
}
function
cleanup
{
function
cleanup
{
Set-Location
"
gguf
/examples/server"
Set-Location
"
${script:llamacppDir}
/examples/server"
git
checkout
CMakeLists.txt
server.cpp
git
checkout
CMakeLists.txt
server.cpp
}
}
...
@@ -64,20 +65,20 @@ git_module_setup
...
@@ -64,20 +65,20 @@ git_module_setup
apply_patches
apply_patches
# first build CPU based
# first build CPU based
$
script
:
buildDir
=
"
gguf
/build/windows/cpu"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cpu"
build
build
install
install
# Then build cuda as a dynamically loaded library
# Then build cuda as a dynamically loaded library
init_vars
init_vars
$
script
:
buildDir
=
"
gguf
/build/windows/cuda"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/cuda"
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_CUBLAS=ON"
)
$
script
:
cmakeDefs
+=
@(
"-DLLAMA_CUBLAS=ON"
)
build
build
install
install
# TODO - actually implement ROCm support on windows
# TODO - actually implement ROCm support on windows
$
script
:
buildDir
=
"
gguf
/build/windows/rocm"
$
script
:
buildDir
=
"
${script:llamacppDir}
/build/windows/rocm"
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
rm
-ea
0
-recurse
-force
-path
"
${script:buildDir}
/lib"
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
md
"
${script:buildDir}
/lib"
-ea
0
>
$null
...
...
llm/
llama.cpp
/generate_darwin.go
→
llm/
generate
/generate_darwin.go
View file @
77d96da9
package
llm
package
generate
//go:generate sh ./gen_darwin.sh
//go:generate sh ./gen_darwin.sh
llm/
llama.cpp
/generate_linux.go
→
llm/
generate
/generate_linux.go
View file @
77d96da9
package
llm
package
generate
//go:generate bash ./gen_linux.sh
//go:generate bash ./gen_linux.sh
llm/
llama.cpp
/generate_windows.go
→
llm/
generate
/generate_windows.go
View file @
77d96da9
package
llm
package
generate
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
llm/shim_darwin.go
View file @
77d96da9
...
@@ -13,7 +13,7 @@ import (
...
@@ -13,7 +13,7 @@ import (
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/api"
)
)
//go:embed llama.cpp/
gguf/
ggml-metal.metal
//go:embed llama.cpp/ggml-metal.metal
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
func
newDynamicShimExtServer
(
library
,
model
string
,
adapters
,
projectors
[]
string
,
numLayers
int64
,
opts
api
.
Options
)
(
extServer
,
error
)
{
func
newDynamicShimExtServer
(
library
,
model
string
,
adapters
,
projectors
[]
string
,
numLayers
int64
,
opts
api
.
Options
)
(
extServer
,
error
)
{
...
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
...
@@ -22,7 +22,7 @@ func newDynamicShimExtServer(library, model string, adapters, projectors []strin
}
}
func
nativeInit
(
workdir
string
)
error
{
func
nativeInit
(
workdir
string
)
error
{
err
:=
extractPayloadFiles
(
workdir
,
"llama.cpp/
gguf/
ggml-metal.metal"
)
err
:=
extractPayloadFiles
(
workdir
,
"llama.cpp/ggml-metal.metal"
)
if
err
!=
nil
{
if
err
!=
nil
{
if
err
==
payloadMissing
{
if
err
==
payloadMissing
{
// TODO perhaps consider this a hard failure on arm macs?
// TODO perhaps consider this a hard failure on arm macs?
...
...
llm/shim_ext_server.go
View file @
77d96da9
...
@@ -34,6 +34,8 @@ type shimExtServer struct {
...
@@ -34,6 +34,8 @@ type shimExtServer struct {
var
shimMutex
sync
.
Mutex
var
shimMutex
sync
.
Mutex
var
llm
*
shimExtServer
var
llm
*
shimExtServer
const
pathComponentCount
=
6
func
(
llm
*
shimExtServer
)
llama_server_init
(
sparams
*
C
.
ext_server_params_t
,
err
*
C
.
ext_server_resp_t
)
{
func
(
llm
*
shimExtServer
)
llama_server_init
(
sparams
*
C
.
ext_server_params_t
,
err
*
C
.
ext_server_resp_t
)
{
C
.
dynamic_shim_llama_server_init
(
llm
.
s
,
sparams
,
err
)
C
.
dynamic_shim_llama_server_init
(
llm
.
s
,
sparams
,
err
)
}
}
...
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
...
@@ -112,7 +114,7 @@ func (llm *shimExtServer) Close() {
}
}
func
nativeInit
(
workdir
string
)
error
{
func
nativeInit
(
workdir
string
)
error
{
libs
,
err
:=
extractDynamicLibs
(
workdir
,
"llama.cpp/
gguf/
build/*/*/lib/*"
)
libs
,
err
:=
extractDynamicLibs
(
workdir
,
"llama.cpp/build/*/*/lib/*"
)
if
err
!=
nil
{
if
err
!=
nil
{
if
err
==
payloadMissing
{
if
err
==
payloadMissing
{
log
.
Printf
(
"%s"
,
payloadMissing
)
log
.
Printf
(
"%s"
,
payloadMissing
)
...
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
...
@@ -151,13 +153,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
for
_
,
file
:=
range
files
{
for
_
,
file
:=
range
files
{
pathComps
:=
strings
.
Split
(
file
,
"/"
)
pathComps
:=
strings
.
Split
(
file
,
"/"
)
if
len
(
pathComps
)
!=
7
{
if
len
(
pathComps
)
!=
pathComponentCount
{
log
.
Printf
(
"unexpected payload components: %v"
,
pathComps
)
log
.
Printf
(
"unexpected payload components: %v"
,
pathComps
)
continue
continue
}
}
// llama.cpp/
gguf/
build/$OS/$VARIANT/lib/$LIBRARY
// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
// Include the variant in the path to avoid conflicts between multiple server libs
// Include the variant in the path to avoid conflicts between multiple server libs
targetDir
:=
filepath
.
Join
(
workDir
,
pathComps
[
4
])
targetDir
:=
filepath
.
Join
(
workDir
,
pathComps
[
pathComponentCount
-
3
])
srcFile
,
err
:=
libEmbed
.
Open
(
file
)
srcFile
,
err
:=
libEmbed
.
Open
(
file
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"read payload %s: %v"
,
file
,
err
)
return
nil
,
fmt
.
Errorf
(
"read payload %s: %v"
,
file
,
err
)
...
...
llm/shim_ext_server_linux.go
View file @
77d96da9
...
@@ -10,7 +10,7 @@ import (
...
@@ -10,7 +10,7 @@ import (
"strings"
"strings"
)
)
//go:embed llama.cpp/
gguf/
build/*/*/lib/*.so
//go:embed llama.cpp/build/*/*/lib/*.so
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
func
updatePath
(
dir
string
)
{
func
updatePath
(
dir
string
)
{
...
...
llm/shim_ext_server_windows.go
View file @
77d96da9
...
@@ -8,7 +8,7 @@ import (
...
@@ -8,7 +8,7 @@ import (
"strings"
"strings"
)
)
//go:embed llama.cpp/
gguf/
build/windows/*/lib/*.dll
//go:embed llama.cpp/build/windows/*/lib/*.dll
var
libEmbed
embed
.
FS
var
libEmbed
embed
.
FS
func
updatePath
(
dir
string
)
{
func
updatePath
(
dir
string
)
{
...
...
tmp
@
328b83de
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment