Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
652c273f
"docs/tutorials/MegatronGPT2Tutorial.md" did not exist on "766f030e722b28fcca66f077f9d89bcf502c3de8"
Unverified
Commit
652c273f
authored
Aug 19, 2024
by
Daniel Hiltgen
Committed by
GitHub
Aug 19, 2024
Browse files
Merge pull request #5049 from dhiltgen/cuda_v12
Cuda v12
parents
88e77050
f9e31da9
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
346 additions
and
190 deletions
+346
-190
.github/workflows/release.yaml
.github/workflows/release.yaml
+15
-5
Dockerfile
Dockerfile
+103
-30
app/ollama.iss
app/ollama.iss
+6
-15
docs/linux.md
docs/linux.md
+4
-6
envconfig/config.go
envconfig/config.go
+5
-5
gpu/amd_common.go
gpu/amd_common.go
+1
-1
gpu/amd_windows.go
gpu/amd_windows.go
+1
-1
gpu/cuda_common.go
gpu/cuda_common.go
+43
-0
gpu/gpu.go
gpu/gpu.go
+49
-21
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+2
-2
gpu/gpu_linux.go
gpu/gpu_linux.go
+1
-1
gpu/types.go
gpu/types.go
+8
-5
llm/ext_server/CMakeLists.txt
llm/ext_server/CMakeLists.txt
+2
-1
llm/generate/gen_common.sh
llm/generate/gen_common.sh
+24
-8
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+2
-0
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+41
-45
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+26
-29
llm/payload.go
llm/payload.go
+2
-2
llm/server.go
llm/server.go
+5
-7
scripts/build_linux.sh
scripts/build_linux.sh
+6
-6
No files found.
.github/workflows/release.yaml
View file @
652c273f
...
...
@@ -187,6 +187,13 @@ jobs:
generate-windows-cuda
:
environment
:
release
runs-on
:
windows
strategy
:
matrix
:
cuda
:
-
version
:
"
11"
url
:
'
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
-
version
:
"
12"
url
:
'
https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env
:
KEY_CONTAINER
:
${{ vars.KEY_CONTAINER }}
steps
:
...
...
@@ -220,11 +227,11 @@ jobs:
with
:
go-version-file
:
go.mod
cache
:
true
-
name
:
'
Install
CUDA'
-
name
:
'
Install
CUDA
${{
matrix.cuda.version
}}
'
run
:
|
$ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
Invoke-WebRequest -Uri "
${{ matrix.cuda.url }}
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA"
...
...
@@ -256,7 +263,7 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
-
uses
:
actions/upload-artifact@v4
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda
-${{ matrix.cuda.version }}
path
:
|
llm/build/**/bin/*
dist/windows-amd64/**
...
...
@@ -265,6 +272,7 @@ jobs:
name
:
windows-cuda-deps
path
:
dist/deps/*
# Import the prior generation steps and build the final windows assets
build-windows
:
environment
:
release
...
...
@@ -314,7 +322,10 @@ jobs:
name
:
generate-windows-cpu
-
uses
:
actions/download-artifact@v4
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda-11
-
uses
:
actions/download-artifact@v4
with
:
name
:
generate-windows-cuda-12
-
uses
:
actions/download-artifact@v4
with
:
name
:
windows-cuda-deps
...
...
@@ -363,7 +374,6 @@ jobs:
-
run
:
|
./scripts/build_linux.sh
./scripts/build_docker.sh
mv dist/deps/* dist/
-
uses
:
actions/upload-artifact@v4
with
:
name
:
dist-linux-amd64
...
...
Dockerfile
View file @
652c273f
ARG
GOLANG_VERSION=1.22.5
ARG
CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG
CUDA_VERSION=11.3.1
ARG
CUDA_VERSION_11=11.3.1
ARG
CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG
CUDA_VERSION_12=12.4.0
ARG
CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG
ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts
...
...
@@ -10,7 +12,7 @@ COPY .git .git
COPY
.gitmodules .gitmodules
COPY
llm llm
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION
_11
-devel-centos7 AS cuda-
11-
build-amd64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
...
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
...
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG
CMAKE_VERSION
...
...
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
AMDGPU_TARGETS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
RUN
mkdir
/tmp/scratch
&&
\
for
dep
in
$(
zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm
*
/bin/deps.txt.gz
)
;
do
\
cp
${
dep
}
/tmp/scratch/
||
exit
1
;
\
done
&&
\
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
/tmp/scratch/
&&
tar
xf -
)
&&
\
mkdir
-p
/go/src/github.com/ollama/ollama/dist/deps/
&&
\
(
cd
/tmp/scratch/
&&
tar
czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz
.
)
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
RUN
mkdir
-p
../../dist/linux-amd64/lib/ollama
&&
\
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
../../dist/linux-amd64/lib/ollama
&&
tar
xf -
)
FROM
--platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG
CMAKE_VERSION
...
...
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ENV
GOARCH amd64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
bash gen_linux.sh
FROM
--platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG
CMAKE_VERSION
...
...
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ENV
GOARCH arm64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
...
...
@@ -95,12 +151,16 @@ COPY . .
COPY
--from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG
GOFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh
FROM
--platform=linux/arm64 cpu-build-arm64 AS build-arm64
...
...
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
WORKDIR
/go/src/github.com/ollama/ollama
COPY
. .
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG
GOFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM
--platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN
cd
/scratch/ollama/
&&
rm
-rf
rocblas libamd
*
libdrm
*
libroc
*
libhip
*
libhsa
*
# Runtime stages
FROM
--platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY
--from=amd64-libs-without-rocm /scratch/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM
--platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
ollama /bin/ollama
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN
update-pciids
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN
ln
-s
/opt/rocm/lib /lib/ollama
EXPOSE
11434
ENV
OLLAMA_HOST 0.0.0.0
...
...
app/ollama.iss
View file @
652c273f
...
...
@@ -87,20 +87,11 @@ DialogFontSize=12
[Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\ollama
_
runners\*"; DestDir: "{app}\ollama
_
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\ollama.exe"; DestDir: "{app}
\bin
"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\
lib\
ollama
\
runners\*"; DestDir: "{app}\
lib\
ollama
\
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\cuda")
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\oneapi")
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
[Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
...
...
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
Filename: "{cmd}"; Parameters: "/C set PATH={app}
\bin
;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
...
...
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry]
Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
Check: NeedsAddPath('{app}')
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}
\bin
"; \
Check: NeedsAddPath('{app}
\bin
')
[Code]
...
...
docs/linux.md
View file @
652c273f
...
...
@@ -20,13 +20,12 @@ GPU.
## Manual install
### Download
the
`ollama`
binary
### Download `ollama`
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH
:
Download and extract the Linux package
:
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
sudo chmod
+x /usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
```
### Adding Ollama as a startup service (recommended)
...
...
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary:
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
sudo chmod
+x /usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
```
## Installing specific versions
...
...
envconfig/config.go
View file @
652c273f
...
...
@@ -174,7 +174,7 @@ func RunnersDir() (p string) {
defer
func
()
{
if
p
==
""
{
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
_
runners'"
)
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
/
runners'"
)
}
}()
...
...
@@ -190,17 +190,17 @@ func RunnersDir() (p string) {
}
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
cwd
}
{
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
filepath
.
Join
(
filepath
.
Dir
(
exe
),
".."
),
cwd
}
{
paths
=
append
(
paths
,
root
,
filepath
.
Join
(
root
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
)
}
// Try a few variations to improve developer experience when building from source in the local tree
for
_
,
path
:=
range
paths
{
candidate
:=
filepath
.
Join
(
path
,
"ollama
_
runners"
)
candidate
:=
filepath
.
Join
(
path
,
"lib"
,
"ollama
"
,
"
runners"
)
if
_
,
err
:=
os
.
Stat
(
candidate
);
err
==
nil
{
p
=
candidate
break
...
...
gpu/amd_common.go
View file @
652c273f
...
...
@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary
exe
,
err
:=
os
.
Executable
()
if
err
==
nil
{
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ROCM next to ollama executable "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
...
...
gpu/amd_windows.go
View file @
652c273f
...
...
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location)
localAppData
:=
os
.
Getenv
(
"LOCALAPPDATA"
)
appDir
:=
filepath
.
Join
(
localAppData
,
"Programs"
,
"Ollama"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ollama installed ROCm at "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
...
...
gpu/cuda_common.go
View file @
652c273f
...
...
@@ -4,9 +4,17 @@ package gpu
import
(
"log/slog"
"os"
"regexp"
"runtime"
"strconv"
"strings"
)
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
func
cudaGetVisibleDevicesEnv
(
gpuInfo
[]
GpuInfo
)
(
string
,
string
)
{
ids
:=
[]
string
{}
for
_
,
info
:=
range
gpuInfo
{
...
...
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
}
return
"CUDA_VISIBLE_DEVICES"
,
strings
.
Join
(
ids
,
","
)
}
func
cudaVariant
(
gpuInfo
CudaGPUInfo
)
string
{
if
runtime
.
GOARCH
==
"arm64"
&&
runtime
.
GOOS
==
"linux"
{
if
CudaTegra
!=
""
{
ver
:=
strings
.
Split
(
CudaTegra
,
"."
)
if
len
(
ver
)
>
0
{
return
"jetpack"
+
ver
[
0
]
}
}
else
if
data
,
err
:=
os
.
ReadFile
(
"/etc/nv_tegra_release"
);
err
==
nil
{
r
:=
regexp
.
MustCompile
(
` R(\d+) `
)
m
:=
r
.
FindSubmatch
(
data
)
if
len
(
m
)
!=
2
{
slog
.
Info
(
"Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version"
)
}
else
{
if
l4t
,
err
:=
strconv
.
Atoi
(
string
(
m
[
1
]));
err
==
nil
{
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch
l4t
{
case
35
:
return
"jetpack5"
case
36
:
return
"jetpack6"
default
:
slog
.
Info
(
"unsupported L4T version"
,
"nv_tegra_release"
,
string
(
data
))
}
}
}
}
}
if
gpuInfo
.
computeMajor
<
6
||
gpuInfo
.
DriverMajor
<
12
{
return
"v11"
}
return
"v12"
}
gpu/gpu.go
View file @
652c273f
...
...
@@ -64,10 +64,6 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory
const
IGPUMemLimit
=
1
*
format
.
GibiByte
// 512G is what they typically report, so anything less than 1G must be iGPU
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
// Note: gpuMutex must already be held
func
initCudaHandles
()
*
cudaHandles
{
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
...
...
@@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo
:
GpuInfo
{
memInfo
:
mem
,
Library
:
"cpu"
,
Variant
:
cpuCapability
,
Variant
:
cpuCapability
.
String
()
,
ID
:
"0"
,
},
},
...
...
@@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
return
GpuInfoList
{
cpus
[
0
]
.
GpuInfo
}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath
:=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"cuda"
)
}
depPath
:=
LibraryDir
()
// Load ALL libraries
cHandles
=
initCudaHandles
()
...
...
@@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
computeMajor
=
int
(
memInfo
.
major
)
gpuInfo
.
computeMinor
=
int
(
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
variant
:=
cudaVariant
(
gpuInfo
)
if
depPath
!=
""
{
gpuInfo
.
DependencyPath
=
depPath
// Check for variant specific directory
if
variant
!=
""
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
));
err
==
nil
{
gpuInfo
.
DependencyPath
=
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
)
}
}
}
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
Variant
=
variant
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
...
...
@@ -306,13 +310,6 @@ func GetGPUInfo() GpuInfoList {
if
envconfig
.
IntelGPU
()
{
oHandles
=
initOneAPIHandles
()
if
oHandles
!=
nil
&&
oHandles
.
oneapi
!=
nil
{
// On windows we bundle the oneapi library one level above the runner dir
depPath
=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"oneapi"
)
}
for
d
:=
range
oHandles
.
oneapi
.
num_drivers
{
if
oHandles
.
oneapi
==
nil
{
// shouldn't happen
...
...
@@ -467,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
func
FindGPULibs
(
baseLibName
string
,
defaultPatterns
[]
string
)
[]
string
{
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var
ldPaths
[]
string
var
patterns
[]
string
gpuLibPaths
:=
[]
string
{}
slog
.
Debug
(
"Searching for GPU library"
,
"name"
,
baseLibName
)
// Start with our bundled libraries
patterns
:=
[]
string
{
filepath
.
Join
(
LibraryDir
(),
baseLibName
)}
switch
runtime
.
GOOS
{
case
"windows"
:
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"PATH"
),
";"
)
...
...
@@ -479,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
default
:
return
gpuLibPaths
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
for
_
,
ldPath
:=
range
ldPaths
{
d
,
err
:=
filepath
.
Abs
(
ldPath
)
if
err
!=
nil
{
continue
}
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
+
"*"
))
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
))
}
patterns
=
append
(
patterns
,
defaultPatterns
...
)
slog
.
Debug
(
"gpu library search"
,
"globs"
,
patterns
)
...
...
@@ -641,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return
""
,
""
}
}
func
LibraryDir
()
string
{
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup executable path"
,
"error"
,
err
)
}
cwd
,
err
:=
os
.
Getwd
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup working directory"
,
"error"
,
err
)
}
// Scan for any of our dependeices, and pick first match
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
filepath
.
Join
(
filepath
.
Dir
(
appExe
),
".."
),
cwd
}
{
libDep
:=
filepath
.
Join
(
"lib"
,
"ollama"
)
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
libDep
)
}
// Developer mode, local build
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
}
slog
.
Warn
(
"unable to locate gpu dependency libraries"
)
return
""
}
gpu/gpu_darwin.go
View file @
652c273f
...
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return
[]
GpuInfo
{
{
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
},
}
...
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return
[]
GpuInfo
{
{
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
},
}
...
...
gpu/gpu_linux.go
View file @
652c273f
...
...
@@ -47,7 +47,7 @@ var (
CudartMgmtName
=
"libcudart.so*"
NvcudaMgmtName
=
"libcuda.so*"
NvmlMgmtName
=
""
// not currently wired on linux
OneapiMgmtName
=
"libze_intel_gpu.so"
OneapiMgmtName
=
"libze_intel_gpu.so
*
"
)
func
GetCPUMem
()
(
memInfo
,
error
)
{
...
...
gpu/types.go
View file @
652c273f
...
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
Library
string
`json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
Variant
CPUCapability
`json:"variant"`
Variant
string
`json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory
uint64
`json:"-"`
...
...
@@ -53,8 +53,10 @@ type CPUInfo struct {
type
CudaGPUInfo
struct
{
GpuInfo
OSOverhead
uint64
// Memory overhead between the driver library and management library
index
int
//nolint:unused,nolintlint
OSOverhead
uint64
// Memory overhead between the driver library and management library
index
int
//nolint:unused,nolintlint
computeMajor
int
//nolint:unused,nolintlint
computeMinor
int
//nolint:unused,nolintlint
}
type
CudaGPUInfoList
[]
CudaGPUInfo
...
...
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for
_
,
info
:=
range
l
{
found
:=
false
requested
:=
info
.
Library
if
info
.
Variant
!=
CPUCapabilityNone
{
requested
+=
"_"
+
info
.
Variant
.
String
()
if
info
.
Variant
!=
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
}
for
i
,
lib
:=
range
libs
{
if
lib
==
requested
{
...
...
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
slog
.
Info
(
"inference compute"
,
"id"
,
g
.
ID
,
"library"
,
g
.
Library
,
"variant"
,
g
.
Variant
,
"compute"
,
g
.
Compute
,
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"name"
,
g
.
Name
,
...
...
llm/ext_server/CMakeLists.txt
View file @
652c273f
set
(
TARGET ollama_llama_server
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
set
(
LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS}
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
)
add_executable
(
${
TARGET
}
server.cpp utils.hpp json.hpp httplib.h
)
install
(
TARGETS
${
TARGET
}
RUNTIME
)
target_compile_definitions
(
${
TARGET
}
PRIVATE
SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
${
LLAMA_SERVER_LDFLAGS
}
)
if
(
WIN32
)
TARGET_LINK_LIBRARIES
(
${
TARGET
}
PRIVATE ws2_32
)
endif
()
...
...
llm/generate/gen_common.sh
View file @
652c273f
...
...
@@ -9,11 +9,14 @@ init_vars() {
ARCH
=
"arm64"
;;
*
)
ARCH
=
$(
uname
-m
|
sed
-e
"s/aarch64/arm64/g"
)
echo
"GOARCH must be set"
echo
"this script is meant to be run from within go generate"
exit
1
;;
esac
LLAMACPP_DIR
=
../llama.cpp
CMAKE_DEFS
=
""
CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
"
CMAKE_TARGETS
=
"--target ollama_llama_server"
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on
${
CMAKE_DEFS
}
"
...
...
@@ -27,6 +30,7 @@ init_vars() {
WHOLE_ARCHIVE
=
"-Wl,-force_load"
NO_WHOLE_ARCHIVE
=
""
GCC_ARCH
=
"-arch
${
ARCH
}
"
DIST_BASE
=
../../dist/darwin-
${
GOARCH
}
/
;;
"Linux"
)
LIB_EXT
=
"so"
...
...
@@ -35,6 +39,7 @@ init_vars() {
# Cross compiling not supported on linux - Use docker
GCC_ARCH
=
""
DIST_BASE
=
../../dist/linux-
${
GOARCH
}
/
;;
*
)
;;
...
...
@@ -42,6 +47,7 @@ init_vars() {
if
[
-z
"
${
CMAKE_CUDA_ARCHITECTURES
}
"
]
;
then
CMAKE_CUDA_ARCHITECTURES
=
"50;52;61;70;75;80"
fi
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
}
git_module_setup
()
{
...
...
@@ -85,26 +91,36 @@ build() {
compress
()
{
echo
"Compressing payloads to reduce overall binary size..."
pids
=
""
rm
-rf
${
BUILD_DIR
}
/bin/
*
.gz
for
f
in
${
BUILD_DIR
}
/bin/
*
;
do
gzip
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
${
GZIP
}
-n
--best
-f
${
f
}
&
compress_
pids+
=
"
$!
"
done
# check for lib directory
if
[
-d
${
BUILD_DIR
}
/lib
]
;
then
for
f
in
${
BUILD_DIR
}
/lib/
*
;
do
gzip
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
${
GZIP
}
-n
--best
-f
${
f
}
&
compress_
pids+
=
"
$!
"
done
fi
echo
for
pid
in
${
pids
}
;
do
}
wait_for_compress
()
{
for
pid
in
${
compress_pids
}
;
do
wait
$pid
done
echo
"Finished compression"
}
install
()
{
echo
"Installing libraries to bin dir
${
BUILD_DIR
}
/bin/"
for
lib
in
$(
find
${
BUILD_DIR
}
-name
\*
.
${
LIB_EXT
}
)
;
do
rm
-f
"
${
BUILD_DIR
}
/bin/
$(
basename
${
lib
}
)
"
cp
-af
"
${
lib
}
"
"
${
BUILD_DIR
}
/bin/"
done
}
# Keep the local tree clean after we're done with the build
cleanup
()
{
(
cd
${
LLAMACPP_DIR
}
/
&&
git checkout CMakeLists.txt
)
...
...
llm/generate/gen_darwin.sh
View file @
652c273f
...
...
@@ -6,6 +6,7 @@
set
-ex
set
-o
pipefail
compress_pids
=
""
echo
"Starting darwin generate script"
source
$(
dirname
$0
)
/gen_common.sh
init_vars
...
...
@@ -98,4 +99,5 @@ case "${GOARCH}" in
esac
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_linux.sh
View file @
652c273f
...
...
@@ -13,6 +13,7 @@
set
-ex
set
-o
pipefail
compress_pids
=
""
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs
()
{
...
...
@@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
export
CUDACXX
=
$(
command
-v
nvcc
)
fi
fi
COMMON_CMAKE_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
COMMON_CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source
$(
dirname
$0
)
/gen_common.sh
init_vars
git_module_setup
...
...
@@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
init_vars
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building custom CPU"
build
install
compress
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
...
...
@@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if
[
-z
"
${
OLLAMA_CPU_TARGET
}
"
-o
"
${
OLLAMA_CPU_TARGET
}
"
=
"cpu"
]
;
then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
...
...
@@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building LCD CPU"
build
install
compress
fi
...
...
@@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx"
echo
"Building AVX CPU"
build
install
compress
fi
...
...
@@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx2"
echo
"Building AVX2 CPU"
build
install
compress
fi
fi
...
...
@@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
if
[
-n
"
${
CUDA_MAJOR
}
"
-a
-z
"
${
CUDA_VARIANT
}
"
]
;
then
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
...
...
@@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
${
OLLAMA_CUSTOM_CUDA_DEFS
}
"
echo
"Building custom CUDA GPU"
else
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on
-DCMAKE_CUDA_FLAGS=-t8
-DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
fi
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
"
export
CUDAFLAGS
=
"-t8"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
-DGGML_STATIC=off"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR
=
"
${
CUDA_DIST_DIR
:-${
DIST_BASE
}
/lib/ollama
}
"
build
# Carry the CUDA libs as payloads to help reduce dependency burden on users
#
# TODO - in the future we may shift to packaging these separately and conditionally
# downloading them in the install script.
DEPS
=
"
$(
ldd
${
BUILD_DIR
}
/bin/ollama_llama_server
)
"
for
lib
in
libcudart.so libcublas.so libcublasLt.so
;
do
DEP
=
$(
echo
"
${
DEPS
}
"
|
grep
${
lib
}
|
cut
-f1
-d
' '
| xargs
||
true
)
if
[
-n
"
${
DEP
}
"
-a
-e
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDART_LIB_DIR
}
/
${
lib
}
"
]
;
then
cp
-d
${
CUDART_LIB_DIR
}
/
${
lib
}*
"
${
BUILD_DIR
}
/bin/"
else
cp
-d
"
${
CUDA_LIB_DIR
}
/
${
lib
}
*"
"
${
BUILD_DIR
}
/bin/"
fi
install
echo
"Installing CUDA dependencies in
${
CUDA_DIST_DIR
}
"
mkdir
-p
"
${
CUDA_DIST_DIR
}
"
for
lib
in
${
CUDA_LIB_DIR
}
/libcudart.so
*
${
CUDA_LIB_DIR
}
/libcublas.so
*
${
CUDA_LIB_DIR
}
/libcublasLt.so
*
;
do
cp
-a
"
${
lib
}
"
"
${
CUDA_DIST_DIR
}
"
done
compress
...
...
@@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
CC
=
icx
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/oneapi"
EXTRA_LIBS
=
"-fsycl -Wl,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/mkl/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/tbb/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
ONEAPI_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
export
LLAMA_SERVER_LDFLAGS
=
"-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS
=
""
# icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
mkdir
-p
"
${
ONEAPI_DIST_DIR
}
"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
sycl
-e
mkl
-e
tbb
)
;
do
cp
"
${
dep
}
"
"
${
BUILD_DIR
}
/bin/
"
cp
-a
"
${
dep
}
"
"
${
ONEAPI_DIST_DIR
}
"
done
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
ONEAPI_DIST_DIR
}
"
install
compress
fi
...
...
@@ -262,23 +260,21 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo
"Building custom ROCM GPU"
fi
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,
\$
ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
ROCM_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
# TODO figure out how to disable runpath (rpath)
# export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
# Record the ROCM dependencies
rm
-f
"
${
BUILD_DIR
}
/bin/deps.txt"
touch
"
${
BUILD_DIR
}
/bin/deps.txt"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
echo
"
${
dep
}
"
>>
"
${
BUILD_DIR
}
/bin/deps.txt"
# copy the ROCM dependencies
mkdir
-p
"
${
ROCM_DIST_DIR
}
"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-v
"
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
cp
-a
"
${
dep
}
"
*
"
${
ROCM_DIST_DIR
}
"
done
# bomb out if for some reason we didn't get a few deps
if
[
$(
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
|
wc
-l
)
-lt
8
]
;
then
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
echo
"ERROR: deps file short"
exit
1
fi
install
compress
fi
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_windows.ps1
View file @
652c273f
...
...
@@ -35,7 +35,7 @@ function init_vars {
)
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
)
$
script
:
ARCH
=
$
Env
:
PROCESSOR_ARCHITECTURE
.
ToLower
()
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\ollama
_
runners"
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\
ollama
\
runners"
md
"
$
script
:
DIST_BASE
"
-ea
0
>
$null
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
,
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
)
...
...
@@ -117,7 +117,7 @@ function build {
if
(
$cmakeDefs
-contains
"-G"
)
{
$extra
=
@(
"-j8"
)
}
else
{
$extra
=
@(
"--"
,
"/
p:CL_MPc
ount
=
8"
)
$extra
=
@(
"--"
,
"/
maxCpuC
ount
:
8"
)
}
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
$(
$
script
:
cmakeTargets
|
ForEach-Object
{
`
"--target
`"
,
$_
})
$extra
"
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
}
)
$extra
...
...
@@ -261,7 +261,7 @@ function build_cuda() {
if ((-not "
${env:OLLAMA_SKIP_CUDA_GENERATE}
") -and ("
${script:CUDA_LIB_DIR}
")) {
# Then build cuda as a dynamically loaded library
$nvcc
= "
$
script
:
CUDA_LIB_DIR
\nvcc.exe
"
$
script
:
CUDA_VERSION
=(get-item (
$nvcc
| split-path | split-path)).Basename
$
script
:
CUDA_VERSION
=(
(
get-item (
$nvcc
| split-path | split-path)).Basename
-Split "
\.
")[0]
if (
$null
-ne
$
script
:
CUDA_VERSION
) {
$
script
:
CUDA_VARIANT
="
_
"+
$
script
:
CUDA_VERSION
}
...
...
@@ -273,9 +273,9 @@ function build_cuda() {
"
-DGGML_CUDA
=
ON
",
"
-DGGML_AVX
=
on
",
"
-DGGML_AVX2
=
off
",
"
-DC
UDAToolkit_INCLUDE_DIR
=
$
script
:
CUDA_INCLUDE_DIR
",
"
-DCMAKE_CUDA_
FLAGS
=
-t8
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
"
"
-DC
MAKE_CUDA_FLAGS
=
-t6
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
",
"
-DCMAKE_CUDA_
COMPILER_TOOLKIT_ROOT
=
$
env
:
CUDA_PATH
"
)
if (
$null
-ne
$
env
:
OLLAMA_CUSTOM_CUDA_DEFS
) {
write-host "
OLLAMA_CUSTOM_CUDA_DEFS
=
`
"
${env:OLLAMA_CUSTOM_CUDA_DEFS}
`"
"
...
...
@@ -286,12 +286,11 @@ function build_cuda() {
sign
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
-ea
0
>
$null
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
}
else
{
write-host
"Skipping CUDA generation step"
}
...
...
@@ -325,18 +324,17 @@ function build_oneapi() {
sign
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
-ea
0
>
$null
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
}
else
{
Write-Host
"Skipping oneAPI generation step"
}
...
...
@@ -386,12 +384,11 @@ function build_rocm() {
sign
install
rm -ea 0 -recurse -force -path "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\rocblas\library\
" -ea 0 >
$null
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\rocblas\library\
" -ea 0 >
$null
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
rocm
\rocblas\library\
"
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\ollama
\rocblas\library\
"
} else {
write-host "
Skipping
ROCm
generation
step
"
}
...
...
llm/payload.go
View file @
652c273f
...
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
availableServers
:=
getAvailableServers
()
requested
:=
info
.
Library
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
{
requested
+=
"_"
+
info
.
Variant
.
String
()
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
}
servers
:=
[]
string
{}
...
...
llm/server.go
View file @
652c273f
...
...
@@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if
runtime
.
GOOS
==
"windows"
{
pathEnv
=
"PATH"
}
//
prepend
the server directory
to
LD_LIBRARY_PATH/PATH
and the parent dir for common dependencies
libraryPaths
:=
[]
string
{
dir
,
filepath
.
Dir
(
dir
)
}
//
Start with
the server directory
for the
LD_LIBRARY_PATH/PATH
libraryPaths
:=
[]
string
{
dir
}
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
// Append our runner directory to the path
// This will favor system libraries over our bundled library dependencies
// favor our bundled library dependencies over system libraries
libraryPaths
=
append
(
libraryPaths
,
filepath
.
SplitList
(
libraryPath
)
...
)
}
// Note: we always put the dependency path first
// since this was the exact version we verified for AMD GPUs
// and we favor what the user had in their path
// since this was the exact version we compiled/linked against
if
gpus
[
0
]
.
DependencyPath
!=
""
{
//
TODO refine for multi-gpu support
//
assume gpus from the same library have the same dependency path
libraryPaths
=
append
([]
string
{
gpus
[
0
]
.
DependencyPath
},
libraryPaths
...
)
}
...
...
scripts/build_linux.sh
View file @
652c273f
...
...
@@ -4,6 +4,7 @@ set -eu
export
VERSION
=
${
VERSION
:-
$(
git describe
--tags
--first-parent
--abbrev
=
7
--long
--dirty
--always
|
sed
-e
"s/^v//g"
)
}
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/ollama/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/ollama/ollama/server.mode=release
\"
'"
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
BUILD_ARCH
=
${
BUILD_ARCH
:-
"amd64 arm64"
}
export
AMDGPU_TARGETS
=
${
AMDGPU_TARGETS
:
=
""
}
...
...
@@ -21,11 +22,10 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t
builder:
$TARGETARCH
\
.
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-
$TARGETARCH
if
[
"
$TARGETARCH
"
=
"amd64"
]
;
then
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/
fi
rm
-rf
./dist/linux-
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/linux-
$TARGETARCH
./dist
docker
rm
builder-
$TARGETARCH
echo
"Compressing final linux bundle..."
rm
-f
./dist/ollama-linux-
$TARGETARCH
.tgz
(
cd
dist/linux-
$TARGETARCH
&&
tar
cf -
.
|
${
GZIP
}
--best
>
../ollama-linux-
$TARGETARCH
.tgz
)
done
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment