Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
652c273f
Unverified
Commit
652c273f
authored
Aug 19, 2024
by
Daniel Hiltgen
Committed by
GitHub
Aug 19, 2024
Browse files
Merge pull request #5049 from dhiltgen/cuda_v12
Cuda v12
parents
88e77050
f9e31da9
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
346 additions
and
190 deletions
+346
-190
.github/workflows/release.yaml
.github/workflows/release.yaml
+15
-5
Dockerfile
Dockerfile
+103
-30
app/ollama.iss
app/ollama.iss
+6
-15
docs/linux.md
docs/linux.md
+4
-6
envconfig/config.go
envconfig/config.go
+5
-5
gpu/amd_common.go
gpu/amd_common.go
+1
-1
gpu/amd_windows.go
gpu/amd_windows.go
+1
-1
gpu/cuda_common.go
gpu/cuda_common.go
+43
-0
gpu/gpu.go
gpu/gpu.go
+49
-21
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+2
-2
gpu/gpu_linux.go
gpu/gpu_linux.go
+1
-1
gpu/types.go
gpu/types.go
+8
-5
llm/ext_server/CMakeLists.txt
llm/ext_server/CMakeLists.txt
+2
-1
llm/generate/gen_common.sh
llm/generate/gen_common.sh
+24
-8
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+2
-0
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+41
-45
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+26
-29
llm/payload.go
llm/payload.go
+2
-2
llm/server.go
llm/server.go
+5
-7
scripts/build_linux.sh
scripts/build_linux.sh
+6
-6
No files found.
.github/workflows/release.yaml
View file @
652c273f
...
...
@@ -187,6 +187,13 @@ jobs:
generate-windows-cuda
:
environment
:
release
runs-on
:
windows
strategy
:
matrix
:
cuda
:
-
version
:
"
11"
url
:
'
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
-
version
:
"
12"
url
:
'
https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env
:
KEY_CONTAINER
:
${{ vars.KEY_CONTAINER }}
steps
:
...
...
@@ -220,11 +227,11 @@ jobs:
with
:
go-version-file
:
go.mod
cache
:
true
-
name
:
'
Install
CUDA'
-
name
:
'
Install
CUDA
${{
matrix.cuda.version
}}
'
run
:
|
$ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
Invoke-WebRequest -Uri "
${{ matrix.cuda.url }}
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA"
...
...
@@ -256,7 +263,7 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
-
uses
:
actions/upload-artifact@v4
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda
-${{ matrix.cuda.version }}
path
:
|
llm/build/**/bin/*
dist/windows-amd64/**
...
...
@@ -265,6 +272,7 @@ jobs:
name
:
windows-cuda-deps
path
:
dist/deps/*
# Import the prior generation steps and build the final windows assets
build-windows
:
environment
:
release
...
...
@@ -314,7 +322,10 @@ jobs:
name
:
generate-windows-cpu
-
uses
:
actions/download-artifact@v4
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda-11
-
uses
:
actions/download-artifact@v4
with
:
name
:
generate-windows-cuda-12
-
uses
:
actions/download-artifact@v4
with
:
name
:
windows-cuda-deps
...
...
@@ -363,7 +374,6 @@ jobs:
-
run
:
|
./scripts/build_linux.sh
./scripts/build_docker.sh
mv dist/deps/* dist/
-
uses
:
actions/upload-artifact@v4
with
:
name
:
dist-linux-amd64
...
...
Dockerfile
View file @
652c273f
ARG
GOLANG_VERSION=1.22.5
ARG
CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG
CUDA_VERSION=11.3.1
ARG
CUDA_VERSION_11=11.3.1
ARG
CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG
CUDA_VERSION_12=12.4.0
ARG
CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG
ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts
...
...
@@ -10,7 +12,7 @@ COPY .git .git
COPY
.gitmodules .gitmodules
COPY
llm llm
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION
_11
-devel-centos7 AS cuda-
11-
build-amd64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
...
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
...
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG
CMAKE_VERSION
...
...
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
AMDGPU_TARGETS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
RUN
mkdir
/tmp/scratch
&&
\
for
dep
in
$(
zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm
*
/bin/deps.txt.gz
)
;
do
\
cp
${
dep
}
/tmp/scratch/
||
exit
1
;
\
done
&&
\
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
/tmp/scratch/
&&
tar
xf -
)
&&
\
mkdir
-p
/go/src/github.com/ollama/ollama/dist/deps/
&&
\
(
cd
/tmp/scratch/
&&
tar
czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz
.
)
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
RUN
mkdir
-p
../../dist/linux-amd64/lib/ollama
&&
\
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
../../dist/linux-amd64/lib/ollama
&&
tar
xf -
)
FROM
--platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG
CMAKE_VERSION
...
...
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ENV
GOARCH amd64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
bash gen_linux.sh
FROM
--platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG
CMAKE_VERSION
...
...
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ENV
GOARCH arm64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
...
...
@@ -95,12 +151,16 @@ COPY . .
COPY
--from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG
GOFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh
FROM
--platform=linux/arm64 cpu-build-arm64 AS build-arm64
...
...
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
WORKDIR
/go/src/github.com/ollama/ollama
COPY
. .
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG
GOFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM
--platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN
cd
/scratch/ollama/
&&
rm
-rf
rocblas libamd
*
libdrm
*
libroc
*
libhip
*
libhsa
*
# Runtime stages
FROM
--platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY
--from=amd64-libs-without-rocm /scratch/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM
--platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
ollama /bin/ollama
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN
update-pciids
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN
ln
-s
/opt/rocm/lib /lib/ollama
EXPOSE
11434
ENV
OLLAMA_HOST 0.0.0.0
...
...
app/ollama.iss
View file @
652c273f
...
...
@@ -87,20 +87,11 @@ DialogFontSize=12
[Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\ollama
_
runners\*"; DestDir: "{app}\ollama
_
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\ollama.exe"; DestDir: "{app}
\bin
"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\
lib\
ollama
\
runners\*"; DestDir: "{app}\
lib\
ollama
\
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\cuda")
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\oneapi")
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
[Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
...
...
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
Filename: "{cmd}"; Parameters: "/C set PATH={app}
\bin
;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
...
...
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry]
Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
Check: NeedsAddPath('{app}')
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}
\bin
"; \
Check: NeedsAddPath('{app}
\bin
')
[Code]
...
...
docs/linux.md
View file @
652c273f
...
...
@@ -20,13 +20,12 @@ GPU.
## Manual install
### Download
the
`ollama`
binary
### Download `ollama`
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH
:
Download and extract the Linux package
:
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
sudo chmod
+x /usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
```
### Adding Ollama as a startup service (recommended)
...
...
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary:
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
sudo chmod
+x /usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
```
## Installing specific versions
...
...
envconfig/config.go
View file @
652c273f
...
...
@@ -174,7 +174,7 @@ func RunnersDir() (p string) {
defer
func
()
{
if
p
==
""
{
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
_
runners'"
)
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
/
runners'"
)
}
}()
...
...
@@ -190,17 +190,17 @@ func RunnersDir() (p string) {
}
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
cwd
}
{
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
filepath
.
Join
(
filepath
.
Dir
(
exe
),
".."
),
cwd
}
{
paths
=
append
(
paths
,
root
,
filepath
.
Join
(
root
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for
_
,
path
:=
range
paths
{
candidate
:=
filepath
.
Join
(
path
,
"ollama
_
runners"
)
candidate
:=
filepath
.
Join
(
path
,
"lib"
,
"ollama
"
,
"
runners"
)
if
_
,
err
:=
os
.
Stat
(
candidate
);
err
==
nil
{
p
=
candidate
break
...
...
gpu/amd_common.go
View file @
652c273f
...
...
@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary
exe
,
err
:=
os
.
Executable
()
if
err
==
nil
{
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ROCM next to ollama executable "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
...
...
gpu/amd_windows.go
View file @
652c273f
...
...
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location)
localAppData
:=
os
.
Getenv
(
"LOCALAPPDATA"
)
appDir
:=
filepath
.
Join
(
localAppData
,
"Programs"
,
"Ollama"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ollama installed ROCm at "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
...
...
gpu/cuda_common.go
View file @
652c273f
...
...
@@ -4,9 +4,17 @@ package gpu
import
(
"log/slog"
"os"
"regexp"
"runtime"
"strconv"
"strings"
)
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
func
cudaGetVisibleDevicesEnv
(
gpuInfo
[]
GpuInfo
)
(
string
,
string
)
{
ids
:=
[]
string
{}
for
_
,
info
:=
range
gpuInfo
{
...
...
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
}
return
"CUDA_VISIBLE_DEVICES"
,
strings
.
Join
(
ids
,
","
)
}
func
cudaVariant
(
gpuInfo
CudaGPUInfo
)
string
{
if
runtime
.
GOARCH
==
"arm64"
&&
runtime
.
GOOS
==
"linux"
{
if
CudaTegra
!=
""
{
ver
:=
strings
.
Split
(
CudaTegra
,
"."
)
if
len
(
ver
)
>
0
{
return
"jetpack"
+
ver
[
0
]
}
}
else
if
data
,
err
:=
os
.
ReadFile
(
"/etc/nv_tegra_release"
);
err
==
nil
{
r
:=
regexp
.
MustCompile
(
` R(\d+) `
)
m
:=
r
.
FindSubmatch
(
data
)
if
len
(
m
)
!=
2
{
slog
.
Info
(
"Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version"
)
}
else
{
if
l4t
,
err
:=
strconv
.
Atoi
(
string
(
m
[
1
]));
err
==
nil
{
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch
l4t
{
case
35
:
return
"jetpack5"
case
36
:
return
"jetpack6"
default
:
slog
.
Info
(
"unsupported L4T version"
,
"nv_tegra_release"
,
string
(
data
))
}
}
}
}
}
if
gpuInfo
.
computeMajor
<
6
||
gpuInfo
.
DriverMajor
<
12
{
return
"v11"
}
return
"v12"
}
gpu/gpu.go
View file @
652c273f
...
...
@@ -64,10 +64,6 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory
const
IGPUMemLimit
=
1
*
format
.
GibiByte
// 512G is what they typically report, so anything less than 1G must be iGPU
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
// Note: gpuMutex must already be held
func
initCudaHandles
()
*
cudaHandles
{
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
...
...
@@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo
:
GpuInfo
{
memInfo
:
mem
,
Library
:
"cpu"
,
Variant
:
cpuCapability
,
Variant
:
cpuCapability
.
String
()
,
ID
:
"0"
,
},
},
...
...
@@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
return
GpuInfoList
{
cpus
[
0
]
.
GpuInfo
}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath
:=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"cuda"
)
}
depPath
:=
LibraryDir
()
// Load ALL libraries
cHandles
=
initCudaHandles
()
...
...
@@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
computeMajor
=
int
(
memInfo
.
major
)
gpuInfo
.
computeMinor
=
int
(
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
variant
:=
cudaVariant
(
gpuInfo
)
if
depPath
!=
""
{
gpuInfo
.
DependencyPath
=
depPath
// Check for variant specific directory
if
variant
!=
""
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
));
err
==
nil
{
gpuInfo
.
DependencyPath
=
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
)
}
}
}
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
Variant
=
variant
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
...
...
@@ -306,13 +310,6 @@ func GetGPUInfo() GpuInfoList {
if
envconfig
.
IntelGPU
()
{
oHandles
=
initOneAPIHandles
()
if
oHandles
!=
nil
&&
oHandles
.
oneapi
!=
nil
{
// On windows we bundle the oneapi library one level above the runner dir
depPath
=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"oneapi"
)
}
for
d
:=
range
oHandles
.
oneapi
.
num_drivers
{
if
oHandles
.
oneapi
==
nil
{
// shouldn't happen
...
...
@@ -467,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
func
FindGPULibs
(
baseLibName
string
,
defaultPatterns
[]
string
)
[]
string
{
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var
ldPaths
[]
string
var
patterns
[]
string
gpuLibPaths
:=
[]
string
{}
slog
.
Debug
(
"Searching for GPU library"
,
"name"
,
baseLibName
)
// Start with our bundled libraries
patterns
:=
[]
string
{
filepath
.
Join
(
LibraryDir
(),
baseLibName
)}
switch
runtime
.
GOOS
{
case
"windows"
:
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"PATH"
),
";"
)
...
...
@@ -479,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
default
:
return
gpuLibPaths
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
for
_
,
ldPath
:=
range
ldPaths
{
d
,
err
:=
filepath
.
Abs
(
ldPath
)
if
err
!=
nil
{
continue
}
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
+
"*"
))
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
))
}
patterns
=
append
(
patterns
,
defaultPatterns
...
)
slog
.
Debug
(
"gpu library search"
,
"globs"
,
patterns
)
...
...
@@ -641,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return
""
,
""
}
}
func
LibraryDir
()
string
{
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup executable path"
,
"error"
,
err
)
}
cwd
,
err
:=
os
.
Getwd
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup working directory"
,
"error"
,
err
)
}
// Scan for any of our dependeices, and pick first match
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
filepath
.
Join
(
filepath
.
Dir
(
appExe
),
".."
),
cwd
}
{
libDep
:=
filepath
.
Join
(
"lib"
,
"ollama"
)
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
libDep
)
}
// Developer mode, local build
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
}
slog
.
Warn
(
"unable to locate gpu dependency libraries"
)
return
""
}
gpu/gpu_darwin.go
View file @
652c273f
...
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return
[]
GpuInfo
{
{
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
},
}
...
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return
[]
GpuInfo
{
{
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
},
}
...
...
gpu/gpu_linux.go
View file @
652c273f
...
...
@@ -47,7 +47,7 @@ var (
CudartMgmtName
=
"libcudart.so*"
NvcudaMgmtName
=
"libcuda.so*"
NvmlMgmtName
=
""
// not currently wired on linux
OneapiMgmtName
=
"libze_intel_gpu.so"
OneapiMgmtName
=
"libze_intel_gpu.so
*
"
)
func
GetCPUMem
()
(
memInfo
,
error
)
{
...
...
gpu/types.go
View file @
652c273f
...
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
Library
string
`json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
Variant
CPUCapability
`json:"variant"`
Variant
string
`json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory
uint64
`json:"-"`
...
...
@@ -55,6 +55,8 @@ type CudaGPUInfo struct {
GpuInfo
OSOverhead
uint64
// Memory overhead between the driver library and management library
index
int
//nolint:unused,nolintlint
computeMajor
int
//nolint:unused,nolintlint
computeMinor
int
//nolint:unused,nolintlint
}
type
CudaGPUInfoList
[]
CudaGPUInfo
...
...
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for
_
,
info
:=
range
l
{
found
:=
false
requested
:=
info
.
Library
if
info
.
Variant
!=
CPUCapabilityNone
{
requested
+=
"_"
+
info
.
Variant
.
String
()
if
info
.
Variant
!=
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
}
for
i
,
lib
:=
range
libs
{
if
lib
==
requested
{
...
...
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
slog
.
Info
(
"inference compute"
,
"id"
,
g
.
ID
,
"library"
,
g
.
Library
,
"variant"
,
g
.
Variant
,
"compute"
,
g
.
Compute
,
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"name"
,
g
.
Name
,
...
...
llm/ext_server/CMakeLists.txt
View file @
652c273f
set
(
TARGET ollama_llama_server
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
set
(
LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS}
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
)
add_executable
(
${
TARGET
}
server.cpp utils.hpp json.hpp httplib.h
)
install
(
TARGETS
${
TARGET
}
RUNTIME
)
target_compile_definitions
(
${
TARGET
}
PRIVATE
SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
${
LLAMA_SERVER_LDFLAGS
}
)
if
(
WIN32
)
TARGET_LINK_LIBRARIES
(
${
TARGET
}
PRIVATE ws2_32
)
endif
()
...
...
llm/generate/gen_common.sh
View file @
652c273f
...
...
@@ -9,11 +9,14 @@ init_vars() {
ARCH
=
"arm64"
;;
*
)
ARCH
=
$(
uname
-m
|
sed
-e
"s/aarch64/arm64/g"
)
echo
"GOARCH must be set"
echo
"this script is meant to be run from within go generate"
exit
1
;;
esac
LLAMACPP_DIR
=
../llama.cpp
CMAKE_DEFS
=
""
CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
"
CMAKE_TARGETS
=
"--target ollama_llama_server"
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on
${
CMAKE_DEFS
}
"
...
...
@@ -27,6 +30,7 @@ init_vars() {
WHOLE_ARCHIVE
=
"-Wl,-force_load"
NO_WHOLE_ARCHIVE
=
""
GCC_ARCH
=
"-arch
${
ARCH
}
"
DIST_BASE
=
../../dist/darwin-
${
GOARCH
}
/
;;
"Linux"
)
LIB_EXT
=
"so"
...
...
@@ -35,6 +39,7 @@ init_vars() {
# Cross compiling not supported on linux - Use docker
GCC_ARCH
=
""
DIST_BASE
=
../../dist/linux-
${
GOARCH
}
/
;;
*
)
;;
...
...
@@ -42,6 +47,7 @@ init_vars() {
if
[
-z
"
${
CMAKE_CUDA_ARCHITECTURES
}
"
]
;
then
CMAKE_CUDA_ARCHITECTURES
=
"50;52;61;70;75;80"
fi
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
}
git_module_setup
()
{
...
...
@@ -85,26 +91,36 @@ build() {
compress
()
{
echo
"Compressing payloads to reduce overall binary size..."
pids
=
""
rm
-rf
${
BUILD_DIR
}
/bin/
*
.gz
for
f
in
${
BUILD_DIR
}
/bin/
*
;
do
gzip
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
${
GZIP
}
-n
--best
-f
${
f
}
&
compress_
pids+
=
"
$!
"
done
# check for lib directory
if
[
-d
${
BUILD_DIR
}
/lib
]
;
then
for
f
in
${
BUILD_DIR
}
/lib/
*
;
do
gzip
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
${
GZIP
}
-n
--best
-f
${
f
}
&
compress_
pids+
=
"
$!
"
done
fi
echo
for
pid
in
${
pids
}
;
do
}
wait_for_compress
()
{
for
pid
in
${
compress_pids
}
;
do
wait
$pid
done
echo
"Finished compression"
}
install
()
{
echo
"Installing libraries to bin dir
${
BUILD_DIR
}
/bin/"
for
lib
in
$(
find
${
BUILD_DIR
}
-name
\*
.
${
LIB_EXT
}
)
;
do
rm
-f
"
${
BUILD_DIR
}
/bin/
$(
basename
${
lib
}
)
"
cp
-af
"
${
lib
}
"
"
${
BUILD_DIR
}
/bin/"
done
}
# Keep the local tree clean after we're done with the build
cleanup
()
{
(
cd
${
LLAMACPP_DIR
}
/
&&
git checkout CMakeLists.txt
)
...
...
llm/generate/gen_darwin.sh
View file @
652c273f
...
...
@@ -6,6 +6,7 @@
set
-ex
set
-o
pipefail
compress_pids
=
""
echo
"Starting darwin generate script"
source
$(
dirname
$0
)
/gen_common.sh
init_vars
...
...
@@ -98,4 +99,5 @@ case "${GOARCH}" in
esac
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_linux.sh
View file @
652c273f
...
...
@@ -13,6 +13,7 @@
set
-ex
set
-o
pipefail
compress_pids
=
""
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs
()
{
...
...
@@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
export
CUDACXX
=
$(
command
-v
nvcc
)
fi
fi
COMMON_CMAKE_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
COMMON_CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source
$(
dirname
$0
)
/gen_common.sh
init_vars
git_module_setup
...
...
@@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
init_vars
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building custom CPU"
build
install
compress
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
...
...
@@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if
[
-z
"
${
OLLAMA_CPU_TARGET
}
"
-o
"
${
OLLAMA_CPU_TARGET
}
"
=
"cpu"
]
;
then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
...
...
@@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building LCD CPU"
build
install
compress
fi
...
...
@@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx"
echo
"Building AVX CPU"
build
install
compress
fi
...
...
@@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx2"
echo
"Building AVX2 CPU"
build
install
compress
fi
fi
...
...
@@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
if
[
-n
"
${
CUDA_MAJOR
}
"
-a
-z
"
${
CUDA_VARIANT
}
"
]
;
then
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
...
...
@@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
${
OLLAMA_CUSTOM_CUDA_DEFS
}
"
echo
"Building custom CUDA GPU"
else
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on
-DCMAKE_CUDA_FLAGS=-t8
-DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
fi
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
"
export
CUDAFLAGS
=
"-t8"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
-DGGML_STATIC=off"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR
=
"
${
CUDA_DIST_DIR
:-${
DIST_BASE
}
/lib/ollama
}
"
build
# Carry the CUDA libs as payloads to help reduce dependency burden on users
#
# TODO - in the future we may shift to packaging these separately and conditionally
# downloading them in the install script.
DEPS
=
"
$(
ldd
${
BUILD_DIR
}
/bin/ollama_llama_server
)
"
for
lib
in
libcudart.so libcublas.so libcublasLt.so
;
do
DEP
=
$(
echo
"
${
DEPS
}
"
|
grep
${
lib
}
|
cut
-f1
-d
' '
| xargs
||
true
)
if
[
-n
"
${
DEP
}
"
-a
-e
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDART_LIB_DIR
}
/
${
lib
}
"
]
;
then
cp
-d
${
CUDART_LIB_DIR
}
/
${
lib
}*
"
${
BUILD_DIR
}
/bin/"
else
cp
-d
"
${
CUDA_LIB_DIR
}
/
${
lib
}
*"
"
${
BUILD_DIR
}
/bin/"
fi
install
echo
"Installing CUDA dependencies in
${
CUDA_DIST_DIR
}
"
mkdir
-p
"
${
CUDA_DIST_DIR
}
"
for
lib
in
${
CUDA_LIB_DIR
}
/libcudart.so
*
${
CUDA_LIB_DIR
}
/libcublas.so
*
${
CUDA_LIB_DIR
}
/libcublasLt.so
*
;
do
cp
-a
"
${
lib
}
"
"
${
CUDA_DIST_DIR
}
"
done
compress
...
...
@@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
CC
=
icx
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/oneapi"
EXTRA_LIBS
=
"-fsycl -Wl,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/mkl/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/tbb/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
ONEAPI_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
export
LLAMA_SERVER_LDFLAGS
=
"-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS
=
""
# icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
mkdir
-p
"
${
ONEAPI_DIST_DIR
}
"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
sycl
-e
mkl
-e
tbb
)
;
do
cp
"
${
dep
}
"
"
${
BUILD_DIR
}
/bin/
"
cp
-a
"
${
dep
}
"
"
${
ONEAPI_DIST_DIR
}
"
done
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
ONEAPI_DIST_DIR
}
"
install
compress
fi
...
...
@@ -262,23 +260,21 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo
"Building custom ROCM GPU"
fi
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,
\$
ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
ROCM_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
# TODO figure out how to disable runpath (rpath)
# export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
# Record the ROCM dependencies
rm
-f
"
${
BUILD_DIR
}
/bin/deps.txt"
touch
"
${
BUILD_DIR
}
/bin/deps.txt"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
echo
"
${
dep
}
"
>>
"
${
BUILD_DIR
}
/bin/deps.txt"
# copy the ROCM dependencies
mkdir
-p
"
${
ROCM_DIST_DIR
}
"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-v
"
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
cp
-a
"
${
dep
}
"
*
"
${
ROCM_DIST_DIR
}
"
done
# bomb out if for some reason we didn't get a few deps
if
[
$(
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
|
wc
-l
)
-lt
8
]
;
then
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
echo
"ERROR: deps file short"
exit
1
fi
install
compress
fi
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_windows.ps1
View file @
652c273f
...
...
@@ -35,7 +35,7 @@ function init_vars {
)
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
)
$
script
:
ARCH
=
$
Env
:
PROCESSOR_ARCHITECTURE
.
ToLower
()
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\ollama
_
runners"
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\
ollama
\
runners"
md
"
$
script
:
DIST_BASE
"
-ea
0
>
$null
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
,
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
)
...
...
@@ -117,7 +117,7 @@ function build {
if
(
$cmakeDefs
-contains
"-G"
)
{
$extra
=
@(
"-j8"
)
}
else
{
$extra
=
@(
"--"
,
"/
p:CL_MPc
ount
=
8"
)
$extra
=
@(
"--"
,
"/
maxCpuC
ount
:
8"
)
}
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
$(
$
script
:
cmakeTargets
|
ForEach-Object
{
`
"--target
`"
,
$_
})
$extra
"
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
}
)
$extra
...
...
@@ -261,7 +261,7 @@ function build_cuda() {
if ((-not "
${env:OLLAMA_SKIP_CUDA_GENERATE}
") -and ("
${script:CUDA_LIB_DIR}
")) {
# Then build cuda as a dynamically loaded library
$nvcc
= "
$
script
:
CUDA_LIB_DIR
\nvcc.exe
"
$
script
:
CUDA_VERSION
=(get-item (
$nvcc
| split-path | split-path)).Basename
$
script
:
CUDA_VERSION
=(
(
get-item (
$nvcc
| split-path | split-path)).Basename
-Split "
\.
")[0]
if (
$null
-ne
$
script
:
CUDA_VERSION
) {
$
script
:
CUDA_VARIANT
="
_
"+
$
script
:
CUDA_VERSION
}
...
...
@@ -273,9 +273,9 @@ function build_cuda() {
"
-DGGML_CUDA
=
ON
",
"
-DGGML_AVX
=
on
",
"
-DGGML_AVX2
=
off
",
"
-DC
UDAToolkit_INCLUDE_DIR
=
$
script
:
CUDA_INCLUDE_DIR
",
"
-DCMAKE_CUDA_
FLAGS
=
-t8
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
"
"
-DC
MAKE_CUDA_FLAGS
=
-t6
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
",
"
-DCMAKE_CUDA_
COMPILER_TOOLKIT_ROOT
=
$
env
:
CUDA_PATH
"
)
if (
$null
-ne
$
env
:
OLLAMA_CUSTOM_CUDA_DEFS
) {
write-host "
OLLAMA_CUSTOM_CUDA_DEFS
=
`
"
${env:OLLAMA_CUSTOM_CUDA_DEFS}
`"
"
...
...
@@ -286,12 +286,11 @@ function build_cuda() {
sign
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
-ea
0
>
$null
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
}
else
{
write-host
"Skipping CUDA generation step"
}
...
...
@@ -325,18 +324,17 @@ function build_oneapi() {
sign
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
-ea
0
>
$null
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
}
else
{
Write-Host
"Skipping oneAPI generation step"
}
...
...
@@ -386,12 +384,11 @@ function build_rocm() {
sign
install
rm -ea 0 -recurse -force -path "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\rocblas\library\
" -ea 0 >
$null
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\rocblas\library\
" -ea 0 >
$null
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
rocm
\rocblas\library\
"
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\ollama
\rocblas\library\
"
} else {
write-host "
Skipping
ROCm
generation
step
"
}
...
...
llm/payload.go
View file @
652c273f
...
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
availableServers
:=
getAvailableServers
()
requested
:=
info
.
Library
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
{
requested
+=
"_"
+
info
.
Variant
.
String
()
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
}
servers
:=
[]
string
{}
...
...
llm/server.go
View file @
652c273f
...
...
@@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if
runtime
.
GOOS
==
"windows"
{
pathEnv
=
"PATH"
}
//
prepend
the server directory
to
LD_LIBRARY_PATH/PATH
and the parent dir for common dependencies
libraryPaths
:=
[]
string
{
dir
,
filepath
.
Dir
(
dir
)
}
//
Start with
the server directory
for the
LD_LIBRARY_PATH/PATH
libraryPaths
:=
[]
string
{
dir
}
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
// Append our runner directory to the path
// This will favor system libraries over our bundled library dependencies
// favor our bundled library dependencies over system libraries
libraryPaths
=
append
(
libraryPaths
,
filepath
.
SplitList
(
libraryPath
)
...
)
}
// Note: we always put the dependency path first
// since this was the exact version we verified for AMD GPUs
// and we favor what the user had in their path
// since this was the exact version we compiled/linked against
if
gpus
[
0
]
.
DependencyPath
!=
""
{
//
TODO refine for multi-gpu support
//
assume gpus from the same library have the same dependency path
libraryPaths
=
append
([]
string
{
gpus
[
0
]
.
DependencyPath
},
libraryPaths
...
)
}
...
...
scripts/build_linux.sh
View file @
652c273f
...
...
@@ -4,6 +4,7 @@ set -eu
export
VERSION
=
${
VERSION
:-
$(
git describe
--tags
--first-parent
--abbrev
=
7
--long
--dirty
--always
|
sed
-e
"s/^v//g"
)
}
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/ollama/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/ollama/ollama/server.mode=release
\"
'"
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
BUILD_ARCH
=
${
BUILD_ARCH
:-
"amd64 arm64"
}
export
AMDGPU_TARGETS
=
${
AMDGPU_TARGETS
:
=
""
}
...
...
@@ -21,11 +22,10 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t
builder:
$TARGETARCH
\
.
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-
$TARGETARCH
if
[
"
$TARGETARCH
"
=
"amd64"
]
;
then
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/
fi
rm
-rf
./dist/linux-
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/linux-
$TARGETARCH
./dist
docker
rm
builder-
$TARGETARCH
echo
"Compressing final linux bundle..."
rm
-f
./dist/ollama-linux-
$TARGETARCH
.tgz
(
cd
dist/linux-
$TARGETARCH
&&
tar
cf -
.
|
${
GZIP
}
--best
>
../ollama-linux-
$TARGETARCH
.tgz
)
done
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment