Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
652c273f
Unverified
Commit
652c273f
authored
Aug 19, 2024
by
Daniel Hiltgen
Committed by
GitHub
Aug 19, 2024
Browse files
Merge pull request #5049 from dhiltgen/cuda_v12
Cuda v12
parents
88e77050
f9e31da9
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
346 additions
and
190 deletions
+346
-190
.github/workflows/release.yaml
.github/workflows/release.yaml
+15
-5
Dockerfile
Dockerfile
+103
-30
app/ollama.iss
app/ollama.iss
+6
-15
docs/linux.md
docs/linux.md
+4
-6
envconfig/config.go
envconfig/config.go
+5
-5
gpu/amd_common.go
gpu/amd_common.go
+1
-1
gpu/amd_windows.go
gpu/amd_windows.go
+1
-1
gpu/cuda_common.go
gpu/cuda_common.go
+43
-0
gpu/gpu.go
gpu/gpu.go
+49
-21
gpu/gpu_darwin.go
gpu/gpu_darwin.go
+2
-2
gpu/gpu_linux.go
gpu/gpu_linux.go
+1
-1
gpu/types.go
gpu/types.go
+8
-5
llm/ext_server/CMakeLists.txt
llm/ext_server/CMakeLists.txt
+2
-1
llm/generate/gen_common.sh
llm/generate/gen_common.sh
+24
-8
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+2
-0
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+41
-45
llm/generate/gen_windows.ps1
llm/generate/gen_windows.ps1
+26
-29
llm/payload.go
llm/payload.go
+2
-2
llm/server.go
llm/server.go
+5
-7
scripts/build_linux.sh
scripts/build_linux.sh
+6
-6
No files found.
.github/workflows/release.yaml
View file @
652c273f
...
@@ -187,6 +187,13 @@ jobs:
...
@@ -187,6 +187,13 @@ jobs:
generate-windows-cuda
:
generate-windows-cuda
:
environment
:
release
environment
:
release
runs-on
:
windows
runs-on
:
windows
strategy
:
matrix
:
cuda
:
-
version
:
"
11"
url
:
'
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
-
version
:
"
12"
url
:
'
https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
env
:
env
:
KEY_CONTAINER
:
${{ vars.KEY_CONTAINER }}
KEY_CONTAINER
:
${{ vars.KEY_CONTAINER }}
steps
:
steps
:
...
@@ -220,11 +227,11 @@ jobs:
...
@@ -220,11 +227,11 @@ jobs:
with
:
with
:
go-version-file
:
go.mod
go-version-file
:
go.mod
cache
:
true
cache
:
true
-
name
:
'
Install
CUDA'
-
name
:
'
Install
CUDA
${{
matrix.cuda.version
}}
'
run
:
|
run
:
|
$ErrorActionPreference = "Stop"
$ErrorActionPreference = "Stop"
write-host "downloading CUDA Installer"
write-host "downloading CUDA Installer"
Invoke-WebRequest -Uri "
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
Invoke-WebRequest -Uri "
${{ matrix.cuda.url }}
" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
write-host "Installing CUDA"
write-host "Installing CUDA"
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
write-host "Completed CUDA"
write-host "Completed CUDA"
...
@@ -256,7 +263,7 @@ jobs:
...
@@ -256,7 +263,7 @@ jobs:
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
-
uses
:
actions/upload-artifact@v4
-
uses
:
actions/upload-artifact@v4
with
:
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda
-${{ matrix.cuda.version }}
path
:
|
path
:
|
llm/build/**/bin/*
llm/build/**/bin/*
dist/windows-amd64/**
dist/windows-amd64/**
...
@@ -265,6 +272,7 @@ jobs:
...
@@ -265,6 +272,7 @@ jobs:
name
:
windows-cuda-deps
name
:
windows-cuda-deps
path
:
dist/deps/*
path
:
dist/deps/*
# Import the prior generation steps and build the final windows assets
# Import the prior generation steps and build the final windows assets
build-windows
:
build-windows
:
environment
:
release
environment
:
release
...
@@ -314,7 +322,10 @@ jobs:
...
@@ -314,7 +322,10 @@ jobs:
name
:
generate-windows-cpu
name
:
generate-windows-cpu
-
uses
:
actions/download-artifact@v4
-
uses
:
actions/download-artifact@v4
with
:
with
:
name
:
generate-windows-cuda
name
:
generate-windows-cuda-11
-
uses
:
actions/download-artifact@v4
with
:
name
:
generate-windows-cuda-12
-
uses
:
actions/download-artifact@v4
-
uses
:
actions/download-artifact@v4
with
:
with
:
name
:
windows-cuda-deps
name
:
windows-cuda-deps
...
@@ -363,7 +374,6 @@ jobs:
...
@@ -363,7 +374,6 @@ jobs:
-
run
:
|
-
run
:
|
./scripts/build_linux.sh
./scripts/build_linux.sh
./scripts/build_docker.sh
./scripts/build_docker.sh
mv dist/deps/* dist/
-
uses
:
actions/upload-artifact@v4
-
uses
:
actions/upload-artifact@v4
with
:
with
:
name
:
dist-linux-amd64
name
:
dist-linux-amd64
...
...
Dockerfile
View file @
652c273f
ARG
GOLANG_VERSION=1.22.5
ARG
GOLANG_VERSION=1.22.5
ARG
CMAKE_VERSION=3.22.1
ARG
CMAKE_VERSION=3.22.1
# this CUDA_VERSION corresponds with the one specified in docs/gpu.md
ARG
CUDA_VERSION_11=11.3.1
ARG
CUDA_VERSION=11.3.1
ARG
CUDA_V11_ARCHITECTURES="50;52;53;60;61;62;70;72;75;80;86"
ARG
CUDA_VERSION_12=12.4.0
ARG
CUDA_V12_ARCHITECTURES="60;61;62;70;72;75;80;86;87;89;90;90a"
ARG
ROCM_VERSION=6.1.2
ARG
ROCM_VERSION=6.1.2
# Copy the minimal context we need to run the generate scripts
# Copy the minimal context we need to run the generate scripts
...
@@ -10,7 +12,7 @@ COPY .git .git
...
@@ -10,7 +12,7 @@ COPY .git .git
COPY
.gitmodules .gitmodules
COPY
.gitmodules .gitmodules
COPY
llm llm
COPY
llm llm
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION
_11
-devel-centos7 AS cuda-
11-
build-amd64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -18,9 +20,34 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH amd64
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 nvidia/cuda:$CUDA_VERSION_12-devel-centos7 AS cuda-12-build-amd64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH amd64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
...
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
...
@@ -28,7 +55,32 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
ARG
CUDA_V11_ARCHITECTURES
ENV
GOARCH arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V11_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v11"
\
bash gen_linux.sh
FROM
--platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
ARG
CMAKE_VERSION
COPY
./scripts/rh_linux_deps.sh /
RUN
CMAKE_VERSION
=
${
CMAKE_VERSION
}
sh /rh_linux_deps.sh
ENV
PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CUDA_V12_ARCHITECTURES
ENV
GOARCH arm64
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
\
OLLAMA_SKIP_CPU_GENERATE
=
1
\
CMAKE_CUDA_ARCHITECTURES
=
"
${
CUDA_V12_ARCHITECTURES
}
"
\
CUDA_VARIANT
=
"_v12"
\
OLLAMA_CUSTOM_CUDA_DEFS
=
"-DGGML_CUDA_USE_GRAPHS=on"
\
bash gen_linux.sh
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
...
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
...
@@ -40,15 +92,11 @@ COPY --from=llm-code / /go/src/github.com/ollama/ollama/
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
ARG
AMDGPU_TARGETS
ARG
AMDGPU_TARGETS
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 sh gen_linux.sh
ENV
GOARCH amd64
RUN
mkdir
/tmp/scratch
&&
\
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
for
dep
in
$(
zcat /go/src/github.com/ollama/ollama/llm/build/linux/x86_64/rocm
*
/bin/deps.txt.gz
)
;
do
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_SKIP_CPU_GENERATE
=
1 bash gen_linux.sh
cp
${
dep
}
/tmp/scratch/
||
exit
1
;
\
RUN
mkdir
-p
../../dist/linux-amd64/lib/ollama
&&
\
done
&&
\
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
../../dist/linux-amd64/lib/ollama
&&
tar
xf -
)
(
cd
/opt/rocm/lib
&&
tar
cf - rocblas/library
)
|
(
cd
/tmp/scratch/
&&
tar
xf -
)
&&
\
mkdir
-p
/go/src/github.com/ollama/ollama/dist/deps/
&&
\
(
cd
/tmp/scratch/
&&
tar
czvf /go/src/github.com/ollama/ollama/dist/deps/ollama-linux-amd64-rocm.tgz
.
)
FROM
--platform=linux/amd64 centos:7 AS cpu-builder-amd64
FROM
--platform=linux/amd64 centos:7 AS cpu-builder-amd64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
...
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
...
@@ -59,16 +107,21 @@ ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
ENV
GOARCH amd64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
FROM
--platform=linux/amd64 cpu-builder-amd64 AS static-build-amd64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx"
bash gen_linux.sh
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
FROM
--platform=linux/amd64 cpu-builder-amd64 AS cpu_avx2-build-amd64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu_avx2"
bash gen_linux.sh
FROM
--platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
FROM
--platform=linux/arm64 rockylinux:8 AS cpu-builder-arm64
ARG
CMAKE_VERSION
ARG
CMAKE_VERSION
...
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
...
@@ -79,12 +132,15 @@ ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
COPY
--from=llm-code / /go/src/github.com/ollama/ollama/
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
OLLAMA_CUSTOM_CPU_DEFS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
ENV
GOARCH arm64
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
WORKDIR
/go/src/github.com/ollama/ollama/llm/generate
FROM
--platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
FROM
--platform=linux/arm64 cpu-builder-arm64 AS static-build-arm64
RUN
OLLAMA_CPU_TARGET
=
"static"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_CPU_TARGET
=
"static"
bash gen_linux.sh
FROM
--platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
FROM
--platform=linux/arm64 cpu-builder-arm64 AS cpu-build-arm64
RUN
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
sh gen_linux.sh
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
OLLAMA_SKIP_STATIC_GENERATE
=
1
OLLAMA_CPU_TARGET
=
"cpu"
bash gen_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
...
@@ -95,12 +151,16 @@ COPY . .
...
@@ -95,12 +151,16 @@ COPY . .
COPY
--from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/deps/ ./dist/deps/
ARG
GOFLAGS
ARG
GOFLAGS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-amd64/bin/ollama .
# Intermediate stage used for ./scripts/build_linux.sh
# Intermediate stage used for ./scripts/build_linux.sh
FROM
--platform=linux/arm64 cpu-build-arm64 AS build-arm64
FROM
--platform=linux/arm64 cpu-build-arm64 AS build-arm64
...
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
...
@@ -109,23 +169,36 @@ ARG GOLANG_VERSION
WORKDIR
/go/src/github.com/ollama/ollama
WORKDIR
/go/src/github.com/ollama/ollama
COPY
. .
COPY
. .
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
COPY
--from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
ARG
GOFLAGS
ARG
GOFLAGS
ARG
CGO_CFLAGS
ARG
CGO_CFLAGS
RUN
go build
-trimpath
.
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
go build
-trimpath
-o
dist/linux-arm64/bin/ollama .
# Strip out ROCm dependencies to keep the primary image lean
FROM
--platform=linux/amd64 ubuntu:22.04 as amd64-libs-without-rocm
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
RUN
cd
/scratch/ollama/
&&
rm
-rf
rocblas libamd
*
libdrm
*
libroc
*
libhip
*
libhsa
*
# Runtime stages
# Runtime stages
FROM
--platform=linux/amd64 ubuntu:22.04 as runtime-amd64
FROM
--platform=linux/amd64 ubuntu:22.04 as runtime-amd64
COPY
--from=amd64-libs-without-rocm /scratch/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
FROM
--platform=linux/arm64 ubuntu:22.04 as runtime-arm64
FROM
--platform=linux/arm64 ubuntu:22.04 as runtime-arm64
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
RUN
apt-get update
&&
apt-get
install
-y
ca-certificates
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
ollama /bin/ollama
COPY
--from=build-arm64 /go/src/github.com/ollama/ollama/
dist/linux-arm64/bin/ /bin/
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
FROM
--platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
RUN
update-pciids
RUN
update-pciids
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/ollama /bin/ollama
COPY
--from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
RUN
ln
-s
/opt/rocm/lib /lib/ollama
EXPOSE
11434
EXPOSE
11434
ENV
OLLAMA_HOST 0.0.0.0
ENV
OLLAMA_HOST 0.0.0.0
...
...
app/ollama.iss
View file @
652c273f
...
@@ -87,20 +87,11 @@ DialogFontSize=12
...
@@ -87,20 +87,11 @@ DialogFontSize=12
[Files]
[Files]
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: ".\app.exe"; DestDir: "{app}"; DestName: "{#MyAppExeName}" ; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
Source: "..\ollama.exe"; DestDir: "{app}
\bin
"; Flags: ignoreversion 64bit
Source: "..\dist\windows-{#ARCH}\ollama
_
runners\*"; DestDir: "{app}\ollama
_
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\windows-{#ARCH}\
lib\
ollama
\
runners\*"; DestDir: "{app}\
lib\
ollama
\
runners"; Flags: ignoreversion 64bit recursesubdirs
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
#if DirExists("..\dist\windows-amd64\cuda")
Source: "..\dist\windows-amd64\lib\ollama\*"; DestDir: "{app}\lib\ollama\"; Flags: ignoreversion recursesubdirs
Source: "..\dist\windows-amd64\cuda\*"; DestDir: "{app}\cuda\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\oneapi")
Source: "..\dist\windows-amd64\oneapi\*"; DestDir: "{app}\oneapi\"; Flags: ignoreversion recursesubdirs
#endif
#if DirExists("..\dist\windows-amd64\rocm")
Source: "..\dist\windows-amd64\rocm\*"; DestDir: "{app}\rocm\"; Flags: ignoreversion recursesubdirs
#endif
[Icons]
[Icons]
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
...
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
...
@@ -108,7 +99,7 @@ Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilen
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
Name: "{userprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
[Run]
[Run]
Filename: "{cmd}"; Parameters: "/C set PATH={app};%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
Filename: "{cmd}"; Parameters: "/C set PATH={app}
\bin
;%PATH% & ""{app}\{#MyAppExeName}"""; Flags: postinstall nowait runhidden
[UninstallRun]
[UninstallRun]
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
; Filename: "{cmd}"; Parameters: "/C ""taskkill /im ''{#MyAppExeName}'' /f /t"; Flags: runhidden
...
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
...
@@ -143,8 +134,8 @@ SetupAppRunningError=Another Ollama installer is running.%n%nPlease cancel or fi
[Registry]
[Registry]
Root: HKCU; Subkey: "Environment"; \
Root: HKCU; Subkey: "Environment"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}"; \
ValueType: expandsz; ValueName: "Path"; ValueData: "{olddata};{app}
\bin
"; \
Check: NeedsAddPath('{app}')
Check: NeedsAddPath('{app}
\bin
')
[Code]
[Code]
...
...
docs/linux.md
View file @
652c273f
...
@@ -20,13 +20,12 @@ GPU.
...
@@ -20,13 +20,12 @@ GPU.
## Manual install
## Manual install
### Download
the
`ollama`
binary
### Download `ollama`
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH
:
Download and extract the Linux package
:
```
bash
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
sudo chmod
+x /usr/bin/ollama
```
```
### Adding Ollama as a startup service (recommended)
### Adding Ollama as a startup service (recommended)
...
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
...
@@ -96,8 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Or by downloading the ollama binary:
Or by downloading the ollama binary:
```
bash
```
bash
sudo
curl
-L
https://ollama.com/download/ollama-linux-amd64
-o
/usr/bin/ollama
curl
-fsSL
https://ollama.com/download/ollama-linux-amd64.tgz |
sudo tar
zx
-C
/usr
sudo chmod
+x /usr/bin/ollama
```
```
## Installing specific versions
## Installing specific versions
...
...
envconfig/config.go
View file @
652c273f
...
@@ -174,7 +174,7 @@ func RunnersDir() (p string) {
...
@@ -174,7 +174,7 @@ func RunnersDir() (p string) {
defer
func
()
{
defer
func
()
{
if
p
==
""
{
if
p
==
""
{
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
_
runners'"
)
slog
.
Error
(
"unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama
/
runners'"
)
}
}
}()
}()
...
@@ -190,17 +190,17 @@ func RunnersDir() (p string) {
...
@@ -190,17 +190,17 @@ func RunnersDir() (p string) {
}
}
var
paths
[]
string
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
cwd
}
{
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
exe
),
filepath
.
Join
(
filepath
.
Dir
(
exe
),
".."
),
cwd
}
{
paths
=
append
(
paths
,
paths
=
append
(
paths
,
root
,
root
,
filepath
.
Join
(
root
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows
-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"
-"
+
runtime
.
GOARCH
),
)
)
}
}
// Try a few variations to improve developer experience when building from source in the local tree
// Try a few variations to improve developer experience when building from source in the local tree
for
_
,
path
:=
range
paths
{
for
_
,
path
:=
range
paths
{
candidate
:=
filepath
.
Join
(
path
,
"ollama
_
runners"
)
candidate
:=
filepath
.
Join
(
path
,
"lib"
,
"ollama
"
,
"
runners"
)
if
_
,
err
:=
os
.
Stat
(
candidate
);
err
==
nil
{
if
_
,
err
:=
os
.
Stat
(
candidate
);
err
==
nil
{
p
=
candidate
p
=
candidate
break
break
...
...
gpu/amd_common.go
View file @
652c273f
...
@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
...
@@ -54,7 +54,7 @@ func commonAMDValidateLibDir() (string, error) {
// Installer payload location if we're running the installed binary
// Installer payload location if we're running the installed binary
exe
,
err
:=
os
.
Executable
()
exe
,
err
:=
os
.
Executable
()
if
err
==
nil
{
if
err
==
nil
{
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
filepath
.
Dir
(
exe
),
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ROCM next to ollama executable "
+
rocmTargetDir
)
slog
.
Debug
(
"detected ROCM next to ollama executable "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
return
rocmTargetDir
,
nil
...
...
gpu/amd_windows.go
View file @
652c273f
...
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
...
@@ -153,7 +153,7 @@ func AMDValidateLibDir() (string, error) {
// Installer payload (if we're running from some other location)
// Installer payload (if we're running from some other location)
localAppData
:=
os
.
Getenv
(
"LOCALAPPDATA"
)
localAppData
:=
os
.
Getenv
(
"LOCALAPPDATA"
)
appDir
:=
filepath
.
Join
(
localAppData
,
"Programs"
,
"Ollama"
)
appDir
:=
filepath
.
Join
(
localAppData
,
"Programs"
,
"Ollama"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
rocm
"
)
rocmTargetDir
:=
filepath
.
Join
(
appDir
,
"
.."
,
"lib"
,
"ollama
"
)
if
rocmLibUsable
(
rocmTargetDir
)
{
if
rocmLibUsable
(
rocmTargetDir
)
{
slog
.
Debug
(
"detected ollama installed ROCm at "
+
rocmTargetDir
)
slog
.
Debug
(
"detected ollama installed ROCm at "
+
rocmTargetDir
)
return
rocmTargetDir
,
nil
return
rocmTargetDir
,
nil
...
...
gpu/cuda_common.go
View file @
652c273f
...
@@ -4,9 +4,17 @@ package gpu
...
@@ -4,9 +4,17 @@ package gpu
import
(
import
(
"log/slog"
"log/slog"
"os"
"regexp"
"runtime"
"strconv"
"strings"
"strings"
)
)
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
func
cudaGetVisibleDevicesEnv
(
gpuInfo
[]
GpuInfo
)
(
string
,
string
)
{
func
cudaGetVisibleDevicesEnv
(
gpuInfo
[]
GpuInfo
)
(
string
,
string
)
{
ids
:=
[]
string
{}
ids
:=
[]
string
{}
for
_
,
info
:=
range
gpuInfo
{
for
_
,
info
:=
range
gpuInfo
{
...
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
...
@@ -19,3 +27,38 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
}
}
return
"CUDA_VISIBLE_DEVICES"
,
strings
.
Join
(
ids
,
","
)
return
"CUDA_VISIBLE_DEVICES"
,
strings
.
Join
(
ids
,
","
)
}
}
func
cudaVariant
(
gpuInfo
CudaGPUInfo
)
string
{
if
runtime
.
GOARCH
==
"arm64"
&&
runtime
.
GOOS
==
"linux"
{
if
CudaTegra
!=
""
{
ver
:=
strings
.
Split
(
CudaTegra
,
"."
)
if
len
(
ver
)
>
0
{
return
"jetpack"
+
ver
[
0
]
}
}
else
if
data
,
err
:=
os
.
ReadFile
(
"/etc/nv_tegra_release"
);
err
==
nil
{
r
:=
regexp
.
MustCompile
(
` R(\d+) `
)
m
:=
r
.
FindSubmatch
(
data
)
if
len
(
m
)
!=
2
{
slog
.
Info
(
"Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version"
)
}
else
{
if
l4t
,
err
:=
strconv
.
Atoi
(
string
(
m
[
1
]));
err
==
nil
{
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch
l4t
{
case
35
:
return
"jetpack5"
case
36
:
return
"jetpack6"
default
:
slog
.
Info
(
"unsupported L4T version"
,
"nv_tegra_release"
,
string
(
data
))
}
}
}
}
}
if
gpuInfo
.
computeMajor
<
6
||
gpuInfo
.
DriverMajor
<
12
{
return
"v11"
}
return
"v12"
}
gpu/gpu.go
View file @
652c273f
...
@@ -64,10 +64,6 @@ var RocmComputeMin = 9
...
@@ -64,10 +64,6 @@ var RocmComputeMin = 9
// TODO find a better way to detect iGPU instead of minimum memory
// TODO find a better way to detect iGPU instead of minimum memory
const
IGPUMemLimit
=
1
*
format
.
GibiByte
// 512G is what they typically report, so anything less than 1G must be iGPU
const
IGPUMemLimit
=
1
*
format
.
GibiByte
// 512G is what they typically report, so anything less than 1G must be iGPU
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var
CudaTegra
string
=
os
.
Getenv
(
"JETSON_JETPACK"
)
// Note: gpuMutex must already be held
// Note: gpuMutex must already be held
func
initCudaHandles
()
*
cudaHandles
{
func
initCudaHandles
()
*
cudaHandles
{
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
...
@@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -215,7 +211,7 @@ func GetGPUInfo() GpuInfoList {
GpuInfo
:
GpuInfo
{
GpuInfo
:
GpuInfo
{
memInfo
:
mem
,
memInfo
:
mem
,
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
cpuCapability
,
Variant
:
cpuCapability
.
String
()
,
ID
:
"0"
,
ID
:
"0"
,
},
},
},
},
...
@@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -229,11 +225,7 @@ func GetGPUInfo() GpuInfoList {
return
GpuInfoList
{
cpus
[
0
]
.
GpuInfo
}
return
GpuInfoList
{
cpus
[
0
]
.
GpuInfo
}
}
}
// On windows we bundle the nvidia library one level above the runner dir
depPath
:=
LibraryDir
()
depPath
:=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"cuda"
)
}
// Load ALL libraries
// Load ALL libraries
cHandles
=
initCudaHandles
()
cHandles
=
initCudaHandles
()
...
@@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
...
@@ -269,11 +261,23 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
computeMajor
=
int
(
memInfo
.
major
)
gpuInfo
.
computeMinor
=
int
(
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
variant
:=
cudaVariant
(
gpuInfo
)
if
depPath
!=
""
{
gpuInfo
.
DependencyPath
=
depPath
// Check for variant specific directory
if
variant
!=
""
{
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
));
err
==
nil
{
gpuInfo
.
DependencyPath
=
filepath
.
Join
(
depPath
,
"cuda_"
+
variant
)
}
}
}
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMajor
=
driverMajor
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
DriverMinor
=
driverMinor
gpuInfo
.
Variant
=
variant
// query the management library as well so we can record any skew between the two
// query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates
// which represents overhead on the GPU we must set aside on subsequent updates
...
@@ -306,13 +310,6 @@ func GetGPUInfo() GpuInfoList {
...
@@ -306,13 +310,6 @@ func GetGPUInfo() GpuInfoList {
if
envconfig
.
IntelGPU
()
{
if
envconfig
.
IntelGPU
()
{
oHandles
=
initOneAPIHandles
()
oHandles
=
initOneAPIHandles
()
if
oHandles
!=
nil
&&
oHandles
.
oneapi
!=
nil
{
if
oHandles
!=
nil
&&
oHandles
.
oneapi
!=
nil
{
// On windows we bundle the oneapi library one level above the runner dir
depPath
=
""
if
runtime
.
GOOS
==
"windows"
&&
envconfig
.
RunnersDir
()
!=
""
{
depPath
=
filepath
.
Join
(
filepath
.
Dir
(
envconfig
.
RunnersDir
()),
"oneapi"
)
}
for
d
:=
range
oHandles
.
oneapi
.
num_drivers
{
for
d
:=
range
oHandles
.
oneapi
.
num_drivers
{
if
oHandles
.
oneapi
==
nil
{
if
oHandles
.
oneapi
==
nil
{
// shouldn't happen
// shouldn't happen
...
@@ -467,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
...
@@ -467,10 +464,12 @@ func GetGPUInfo() GpuInfoList {
func
FindGPULibs
(
baseLibName
string
,
defaultPatterns
[]
string
)
[]
string
{
func
FindGPULibs
(
baseLibName
string
,
defaultPatterns
[]
string
)
[]
string
{
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var
ldPaths
[]
string
var
ldPaths
[]
string
var
patterns
[]
string
gpuLibPaths
:=
[]
string
{}
gpuLibPaths
:=
[]
string
{}
slog
.
Debug
(
"Searching for GPU library"
,
"name"
,
baseLibName
)
slog
.
Debug
(
"Searching for GPU library"
,
"name"
,
baseLibName
)
// Start with our bundled libraries
patterns
:=
[]
string
{
filepath
.
Join
(
LibraryDir
(),
baseLibName
)}
switch
runtime
.
GOOS
{
switch
runtime
.
GOOS
{
case
"windows"
:
case
"windows"
:
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"PATH"
),
";"
)
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"PATH"
),
";"
)
...
@@ -479,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
...
@@ -479,13 +478,14 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
default
:
default
:
return
gpuLibPaths
return
gpuLibPaths
}
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
// Then with whatever we find in the PATH/LD_LIBRARY_PATH
for
_
,
ldPath
:=
range
ldPaths
{
for
_
,
ldPath
:=
range
ldPaths
{
d
,
err
:=
filepath
.
Abs
(
ldPath
)
d
,
err
:=
filepath
.
Abs
(
ldPath
)
if
err
!=
nil
{
if
err
!=
nil
{
continue
continue
}
}
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
+
"*"
))
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
))
}
}
patterns
=
append
(
patterns
,
defaultPatterns
...
)
patterns
=
append
(
patterns
,
defaultPatterns
...
)
slog
.
Debug
(
"gpu library search"
,
"globs"
,
patterns
)
slog
.
Debug
(
"gpu library search"
,
"globs"
,
patterns
)
...
@@ -641,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
...
@@ -641,3 +641,31 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return
""
,
""
return
""
,
""
}
}
}
}
func
LibraryDir
()
string
{
// On Windows/linux we bundle the dependencies at the same level as the executable
appExe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup executable path"
,
"error"
,
err
)
}
cwd
,
err
:=
os
.
Getwd
()
if
err
!=
nil
{
slog
.
Warn
(
"failed to lookup working directory"
,
"error"
,
err
)
}
// Scan for any of our dependeices, and pick first match
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
filepath
.
Join
(
filepath
.
Dir
(
appExe
),
".."
),
cwd
}
{
libDep
:=
filepath
.
Join
(
"lib"
,
"ollama"
)
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
libDep
)
}
// Developer mode, local build
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
if
_
,
err
:=
os
.
Stat
(
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
));
err
==
nil
{
return
filepath
.
Join
(
root
,
"dist"
,
runtime
.
GOOS
+
"-"
+
runtime
.
GOARCH
,
libDep
)
}
}
slog
.
Warn
(
"unable to locate gpu dependency libraries"
)
return
""
}
gpu/gpu_darwin.go
View file @
652c273f
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
...
@@ -25,7 +25,7 @@ func GetGPUInfo() GpuInfoList {
return
[]
GpuInfo
{
return
[]
GpuInfo
{
{
{
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
memInfo
:
mem
,
},
},
}
}
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
...
@@ -48,7 +48,7 @@ func GetCPUInfo() GpuInfoList {
return
[]
GpuInfo
{
return
[]
GpuInfo
{
{
{
Library
:
"cpu"
,
Library
:
"cpu"
,
Variant
:
GetCPUCapability
(),
Variant
:
GetCPUCapability
()
.
String
()
,
memInfo
:
mem
,
memInfo
:
mem
,
},
},
}
}
...
...
gpu/gpu_linux.go
View file @
652c273f
...
@@ -47,7 +47,7 @@ var (
...
@@ -47,7 +47,7 @@ var (
CudartMgmtName
=
"libcudart.so*"
CudartMgmtName
=
"libcudart.so*"
NvcudaMgmtName
=
"libcuda.so*"
NvcudaMgmtName
=
"libcuda.so*"
NvmlMgmtName
=
""
// not currently wired on linux
NvmlMgmtName
=
""
// not currently wired on linux
OneapiMgmtName
=
"libze_intel_gpu.so"
OneapiMgmtName
=
"libze_intel_gpu.so
*
"
)
)
func
GetCPUMem
()
(
memInfo
,
error
)
{
func
GetCPUMem
()
(
memInfo
,
error
)
{
...
...
gpu/types.go
View file @
652c273f
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
...
@@ -19,7 +19,7 @@ type GpuInfo struct {
Library
string
`json:"library,omitempty"`
Library
string
`json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
// Optional variant to select (e.g. versions, cpu feature flags)
Variant
CPUCapability
`json:"variant"`
Variant
string
`json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory
uint64
`json:"-"`
MinimumMemory
uint64
`json:"-"`
...
@@ -53,8 +53,10 @@ type CPUInfo struct {
...
@@ -53,8 +53,10 @@ type CPUInfo struct {
type
CudaGPUInfo
struct
{
type
CudaGPUInfo
struct
{
GpuInfo
GpuInfo
OSOverhead
uint64
// Memory overhead between the driver library and management library
OSOverhead
uint64
// Memory overhead between the driver library and management library
index
int
//nolint:unused,nolintlint
index
int
//nolint:unused,nolintlint
computeMajor
int
//nolint:unused,nolintlint
computeMinor
int
//nolint:unused,nolintlint
}
}
type
CudaGPUInfoList
[]
CudaGPUInfo
type
CudaGPUInfoList
[]
CudaGPUInfo
...
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
...
@@ -81,8 +83,8 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
for
_
,
info
:=
range
l
{
for
_
,
info
:=
range
l
{
found
:=
false
found
:=
false
requested
:=
info
.
Library
requested
:=
info
.
Library
if
info
.
Variant
!=
CPUCapabilityNone
{
if
info
.
Variant
!=
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
.
String
()
requested
+=
"_"
+
info
.
Variant
}
}
for
i
,
lib
:=
range
libs
{
for
i
,
lib
:=
range
libs
{
if
lib
==
requested
{
if
lib
==
requested
{
...
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
...
@@ -105,6 +107,7 @@ func (l GpuInfoList) LogDetails() {
slog
.
Info
(
"inference compute"
,
slog
.
Info
(
"inference compute"
,
"id"
,
g
.
ID
,
"id"
,
g
.
ID
,
"library"
,
g
.
Library
,
"library"
,
g
.
Library
,
"variant"
,
g
.
Variant
,
"compute"
,
g
.
Compute
,
"compute"
,
g
.
Compute
,
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"name"
,
g
.
Name
,
"name"
,
g
.
Name
,
...
...
llm/ext_server/CMakeLists.txt
View file @
652c273f
set
(
TARGET ollama_llama_server
)
set
(
TARGET ollama_llama_server
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
option
(
LLAMA_SERVER_VERBOSE
"Build verbose logging option for Server"
ON
)
set
(
LLAMA_SERVER_LDFLAGS $ENV{LLAMA_SERVER_LDFLAGS}
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
)
add_executable
(
${
TARGET
}
server.cpp utils.hpp json.hpp httplib.h
)
add_executable
(
${
TARGET
}
server.cpp utils.hpp json.hpp httplib.h
)
install
(
TARGETS
${
TARGET
}
RUNTIME
)
install
(
TARGETS
${
TARGET
}
RUNTIME
)
target_compile_definitions
(
${
TARGET
}
PRIVATE
target_compile_definitions
(
${
TARGET
}
PRIVATE
SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
SERVER_VERBOSE=$<BOOL:
${
LLAMA_SERVER_VERBOSE
}
>
)
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
)
target_link_libraries
(
${
TARGET
}
PRIVATE ggml llama common llava
${
CMAKE_THREAD_LIBS_INIT
}
${
LLAMA_SERVER_LDFLAGS
}
)
if
(
WIN32
)
if
(
WIN32
)
TARGET_LINK_LIBRARIES
(
${
TARGET
}
PRIVATE ws2_32
)
TARGET_LINK_LIBRARIES
(
${
TARGET
}
PRIVATE ws2_32
)
endif
()
endif
()
...
...
llm/generate/gen_common.sh
View file @
652c273f
...
@@ -9,11 +9,14 @@ init_vars() {
...
@@ -9,11 +9,14 @@ init_vars() {
ARCH
=
"arm64"
ARCH
=
"arm64"
;;
;;
*
)
*
)
ARCH
=
$(
uname
-m
|
sed
-e
"s/aarch64/arm64/g"
)
echo
"GOARCH must be set"
echo
"this script is meant to be run from within go generate"
exit
1
;;
esac
esac
LLAMACPP_DIR
=
../llama.cpp
LLAMACPP_DIR
=
../llama.cpp
CMAKE_DEFS
=
""
CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
"
CMAKE_TARGETS
=
"--target ollama_llama_server"
CMAKE_TARGETS
=
"--target ollama_llama_server"
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
if
echo
"
${
CGO_CFLAGS
}
"
|
grep
--
'-g'
>
/dev/null
;
then
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on
${
CMAKE_DEFS
}
"
...
@@ -27,6 +30,7 @@ init_vars() {
...
@@ -27,6 +30,7 @@ init_vars() {
WHOLE_ARCHIVE
=
"-Wl,-force_load"
WHOLE_ARCHIVE
=
"-Wl,-force_load"
NO_WHOLE_ARCHIVE
=
""
NO_WHOLE_ARCHIVE
=
""
GCC_ARCH
=
"-arch
${
ARCH
}
"
GCC_ARCH
=
"-arch
${
ARCH
}
"
DIST_BASE
=
../../dist/darwin-
${
GOARCH
}
/
;;
;;
"Linux"
)
"Linux"
)
LIB_EXT
=
"so"
LIB_EXT
=
"so"
...
@@ -35,6 +39,7 @@ init_vars() {
...
@@ -35,6 +39,7 @@ init_vars() {
# Cross compiling not supported on linux - Use docker
# Cross compiling not supported on linux - Use docker
GCC_ARCH
=
""
GCC_ARCH
=
""
DIST_BASE
=
../../dist/linux-
${
GOARCH
}
/
;;
;;
*
)
*
)
;;
;;
...
@@ -42,6 +47,7 @@ init_vars() {
...
@@ -42,6 +47,7 @@ init_vars() {
if
[
-z
"
${
CMAKE_CUDA_ARCHITECTURES
}
"
]
;
then
if
[
-z
"
${
CMAKE_CUDA_ARCHITECTURES
}
"
]
;
then
CMAKE_CUDA_ARCHITECTURES
=
"50;52;61;70;75;80"
CMAKE_CUDA_ARCHITECTURES
=
"50;52;61;70;75;80"
fi
fi
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
}
}
git_module_setup
()
{
git_module_setup
()
{
...
@@ -85,26 +91,36 @@ build() {
...
@@ -85,26 +91,36 @@ build() {
compress
()
{
compress
()
{
echo
"Compressing payloads to reduce overall binary size..."
echo
"Compressing payloads to reduce overall binary size..."
pids
=
""
rm
-rf
${
BUILD_DIR
}
/bin/
*
.gz
rm
-rf
${
BUILD_DIR
}
/bin/
*
.gz
for
f
in
${
BUILD_DIR
}
/bin/
*
;
do
for
f
in
${
BUILD_DIR
}
/bin/
*
;
do
gzip
-n
--best
-f
${
f
}
&
${
GZIP
}
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
compress_
pids+
=
"
$!
"
done
done
# check for lib directory
# check for lib directory
if
[
-d
${
BUILD_DIR
}
/lib
]
;
then
if
[
-d
${
BUILD_DIR
}
/lib
]
;
then
for
f
in
${
BUILD_DIR
}
/lib/
*
;
do
for
f
in
${
BUILD_DIR
}
/lib/
*
;
do
gzip
-n
--best
-f
${
f
}
&
${
GZIP
}
-n
--best
-f
${
f
}
&
pids+
=
"
$!
"
compress_
pids+
=
"
$!
"
done
done
fi
fi
echo
echo
for
pid
in
${
pids
}
;
do
}
wait_for_compress
()
{
for
pid
in
${
compress_pids
}
;
do
wait
$pid
wait
$pid
done
done
echo
"Finished compression"
echo
"Finished compression"
}
}
install
()
{
echo
"Installing libraries to bin dir
${
BUILD_DIR
}
/bin/"
for
lib
in
$(
find
${
BUILD_DIR
}
-name
\*
.
${
LIB_EXT
}
)
;
do
rm
-f
"
${
BUILD_DIR
}
/bin/
$(
basename
${
lib
}
)
"
cp
-af
"
${
lib
}
"
"
${
BUILD_DIR
}
/bin/"
done
}
# Keep the local tree clean after we're done with the build
# Keep the local tree clean after we're done with the build
cleanup
()
{
cleanup
()
{
(
cd
${
LLAMACPP_DIR
}
/
&&
git checkout CMakeLists.txt
)
(
cd
${
LLAMACPP_DIR
}
/
&&
git checkout CMakeLists.txt
)
...
...
llm/generate/gen_darwin.sh
View file @
652c273f
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
set
-ex
set
-ex
set
-o
pipefail
set
-o
pipefail
compress_pids
=
""
echo
"Starting darwin generate script"
echo
"Starting darwin generate script"
source
$(
dirname
$0
)
/gen_common.sh
source
$(
dirname
$0
)
/gen_common.sh
init_vars
init_vars
...
@@ -98,4 +99,5 @@ case "${GOARCH}" in
...
@@ -98,4 +99,5 @@ case "${GOARCH}" in
esac
esac
cleanup
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_linux.sh
View file @
652c273f
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
set
-ex
set
-ex
set
-o
pipefail
set
-o
pipefail
compress_pids
=
""
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs
()
{
amdGPUs
()
{
...
@@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
...
@@ -51,7 +52,7 @@ if [ -z "${CUDACXX}" ]; then
export
CUDACXX
=
$(
command
-v
nvcc
)
export
CUDACXX
=
$(
command
-v
nvcc
)
fi
fi
fi
fi
COMMON_CMAKE_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
COMMON_CMAKE_DEFS
=
"
-DCMAKE_SKIP_RPATH=on
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source
$(
dirname
$0
)
/gen_common.sh
source
$(
dirname
$0
)
/gen_common.sh
init_vars
init_vars
git_module_setup
git_module_setup
...
@@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -77,10 +78,11 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
if
[
-n
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
"
]
;
then
init_vars
init_vars
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
echo
"OLLAMA_CUSTOM_CPU_DEFS=
\"
${
OLLAMA_CUSTOM_CPU_DEFS
}
\"
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"
${
OLLAMA_CUSTOM_CPU_DEFS
}
-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building custom CPU"
echo
"Building custom CPU"
build
build
install
compress
compress
else
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
...
@@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -93,7 +95,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
ff
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
COMMON_CPU_DEFS
=
"-DBUILD_SHARED_LIBS=o
n
-DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if
[
-z
"
${
OLLAMA_CPU_TARGET
}
"
-o
"
${
OLLAMA_CPU_TARGET
}
"
=
"cpu"
]
;
then
if
[
-z
"
${
OLLAMA_CPU_TARGET
}
"
-o
"
${
OLLAMA_CPU_TARGET
}
"
=
"cpu"
]
;
then
#
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
...
@@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -103,6 +105,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu"
echo
"Building LCD CPU"
echo
"Building LCD CPU"
build
build
install
compress
compress
fi
fi
...
@@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -120,6 +123,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx"
echo
"Building AVX CPU"
echo
"Building AVX CPU"
build
build
install
compress
compress
fi
fi
...
@@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
...
@@ -133,6 +137,7 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx2"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cpu_avx2"
echo
"Building AVX2 CPU"
echo
"Building AVX2 CPU"
build
build
install
compress
compress
fi
fi
fi
fi
...
@@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
...
@@ -160,7 +165,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo
"CUDA libraries detected - building dynamic CUDA library"
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
init_vars
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
if
[
-n
"
${
CUDA_MAJOR
}
"
-a
-z
"
${
CUDA_VARIANT
}
"
]
;
then
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
fi
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
if
[
"
${
ARCH
}
"
==
"arm64"
]
;
then
...
@@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
...
@@ -178,29 +183,19 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
${
OLLAMA_CUSTOM_CUDA_DEFS
}
"
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
${
OLLAMA_CUSTOM_CUDA_DEFS
}
"
echo
"Building custom CUDA GPU"
echo
"Building custom CUDA GPU"
else
else
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on
-DCMAKE_CUDA_FLAGS=-t8
-DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
CMAKE_CUDA_DEFS
=
"-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=
${
CMAKE_CUDA_ARCHITECTURES
}
"
fi
fi
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
"
export
CUDAFLAGS
=
"-t8"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
${
ARM64_DEFS
}
${
CMAKE_CUDA_DEFS
}
-DGGML_STATIC=off"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/cuda
${
CUDA_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
CUDA_LIB_DIR
}
-lcudart -lcublas -lcublasLt -lcuda"
CUDA_DIST_DIR
=
"
${
CUDA_DIST_DIR
:-${
DIST_BASE
}
/lib/ollama
}
"
build
build
install
# Carry the CUDA libs as payloads to help reduce dependency burden on users
echo
"Installing CUDA dependencies in
${
CUDA_DIST_DIR
}
"
#
mkdir
-p
"
${
CUDA_DIST_DIR
}
"
# TODO - in the future we may shift to packaging these separately and conditionally
for
lib
in
${
CUDA_LIB_DIR
}
/libcudart.so
*
${
CUDA_LIB_DIR
}
/libcublas.so
*
${
CUDA_LIB_DIR
}
/libcublasLt.so
*
;
do
# downloading them in the install script.
cp
-a
"
${
lib
}
"
"
${
CUDA_DIST_DIR
}
"
DEPS
=
"
$(
ldd
${
BUILD_DIR
}
/bin/ollama_llama_server
)
"
for
lib
in
libcudart.so libcublas.so libcublasLt.so
;
do
DEP
=
$(
echo
"
${
DEPS
}
"
|
grep
${
lib
}
|
cut
-f1
-d
' '
| xargs
||
true
)
if
[
-n
"
${
DEP
}
"
-a
-e
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
DEP
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
]
;
then
cp
"
${
CUDA_LIB_DIR
}
/
${
lib
}
.
${
CUDA_MAJOR
}
"
"
${
BUILD_DIR
}
/bin/"
elif
[
-e
"
${
CUDART_LIB_DIR
}
/
${
lib
}
"
]
;
then
cp
-d
${
CUDART_LIB_DIR
}
/
${
lib
}*
"
${
BUILD_DIR
}
/bin/"
else
cp
-d
"
${
CUDA_LIB_DIR
}
/
${
lib
}
*"
"
${
BUILD_DIR
}
/bin/"
fi
done
done
compress
compress
...
@@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
...
@@ -218,21 +213,24 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
CC
=
icx
CC
=
icx
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
CMAKE_DEFS
=
"
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
-DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/oneapi"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/oneapi"
EXTRA_LIBS
=
"-fsycl -Wl,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/mkl/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/tbb/latest/lib,-rpath,
${
ONEAPI_ROOT
}
/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
ONEAPI_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
export
LLAMA_SERVER_LDFLAGS
=
"-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS
=
""
# icx compiles with -O0 if we pass -g, so we must remove it
DEBUG_FLAGS
=
""
# icx compiles with -O0 if we pass -g, so we must remove it
build
build
# copy oneAPI dependencies
# copy oneAPI dependencies
mkdir
-p
"
${
ONEAPI_DIST_DIR
}
"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
sycl
-e
mkl
-e
tbb
)
;
do
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
sycl
-e
mkl
-e
tbb
)
;
do
cp
"
${
dep
}
"
"
${
BUILD_DIR
}
/bin/
"
cp
-a
"
${
dep
}
"
"
${
ONEAPI_DIST_DIR
}
"
done
done
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libOpenCL.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libimf.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libintlc.so.5"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libirng.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libpi_level_zero.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libsvml.so"
"
${
ONEAPI_DIST_DIR
}
"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
BUILD_DIR
}
/bin/"
cp
"
${
ONEAPI_ROOT
}
/compiler/latest/lib/libur_loader.so.0"
"
${
ONEAPI_DIST_DIR
}
"
install
compress
compress
fi
fi
...
@@ -262,23 +260,21 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
...
@@ -262,23 +260,21 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo
"Building custom ROCM GPU"
echo
"Building custom ROCM GPU"
fi
fi
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
BUILD_DIR
=
"../build/linux/
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
EXTRA_LIBS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,
\$
ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
ROCM_DIST_DIR
=
"
${
DIST_BASE
}
/lib/ollama"
# TODO figure out how to disable runpath (rpath)
# export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
export
LLAMA_SERVER_LDFLAGS
=
"-L
${
ROCM_PATH
}
/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
build
# Record the ROCM dependencies
# copy the ROCM dependencies
rm
-f
"
${
BUILD_DIR
}
/bin/deps.txt"
mkdir
-p
"
${
ROCM_DIST_DIR
}
"
touch
"
${
BUILD_DIR
}
/bin/deps.txt"
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-v
"
${
ARCH
}
/rocm
${
ROCM_VARIANT
}
"
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
for
dep
in
$(
ldd
"
${
BUILD_DIR
}
/bin/ollama_llama_server"
|
grep
"=>"
|
cut
-f2
-d
=
|
cut
-f2
-d
' '
|
grep
-e
rocm
-e
amdgpu
-e
libtinfo
)
;
do
cp
-a
"
${
dep
}
"
*
"
${
ROCM_DIST_DIR
}
"
echo
"
${
dep
}
"
>>
"
${
BUILD_DIR
}
/bin/deps.txt"
done
done
# bomb out if for some reason we didn't get a few deps
install
if
[
$(
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
|
wc
-l
)
-lt
8
]
;
then
cat
"
${
BUILD_DIR
}
/bin/deps.txt"
echo
"ERROR: deps file short"
exit
1
fi
compress
compress
fi
fi
cleanup
cleanup
wait_for_compress
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
echo
"go generate completed. LLM runners:
$(
cd
${
BUILD_DIR
}
/..
;
echo
*
)
"
llm/generate/gen_windows.ps1
View file @
652c273f
...
@@ -35,7 +35,7 @@ function init_vars {
...
@@ -35,7 +35,7 @@ function init_vars {
)
)
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
)
$
script
:
commonCpuDefs
=
@(
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
)
$
script
:
ARCH
=
$
Env
:
PROCESSOR_ARCHITECTURE
.
ToLower
()
$
script
:
ARCH
=
$
Env
:
PROCESSOR_ARCHITECTURE
.
ToLower
()
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\ollama
_
runners"
$
script
:
DIST_BASE
=
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\
ollama
\
runners"
md
"
$
script
:
DIST_BASE
"
-ea
0
>
$null
md
"
$
script
:
DIST_BASE
"
-ea
0
>
$null
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
if
(
$
env
:
CGO_CFLAGS
-contains
"-g"
)
{
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
,
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
)
$
script
:
cmakeDefs
+=
@(
"-DCMAKE_VERBOSE_MAKEFILE=on"
,
"-DLLAMA_SERVER_VERBOSE=on"
,
"-DCMAKE_BUILD_TYPE=RelWithDebInfo"
)
...
@@ -117,7 +117,7 @@ function build {
...
@@ -117,7 +117,7 @@ function build {
if
(
$cmakeDefs
-contains
"-G"
)
{
if
(
$cmakeDefs
-contains
"-G"
)
{
$extra
=
@(
"-j8"
)
$extra
=
@(
"-j8"
)
}
else
{
}
else
{
$extra
=
@(
"--"
,
"/
p:CL_MPc
ount
=
8"
)
$extra
=
@(
"--"
,
"/
maxCpuC
ount
:
8"
)
}
}
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
$(
$
script
:
cmakeTargets
|
ForEach-Object
{
`
"--target
`"
,
$_
})
$extra
"
write-host
"building with: cmake --build
$
script
:
buildDir
--config
$
script
:
config
$(
$
script
:
cmakeTargets
|
ForEach-Object
{
`
"--target
`"
,
$_
})
$extra
"
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
}
)
$extra
&
cmake
--build
$
script
:
buildDir
--config
$
script
:
config
(
$
script
:
cmakeTargets
|
ForEach-Object
{
"--target"
,
$_
}
)
$extra
...
@@ -261,7 +261,7 @@ function build_cuda() {
...
@@ -261,7 +261,7 @@ function build_cuda() {
if ((-not "
${env:OLLAMA_SKIP_CUDA_GENERATE}
") -and ("
${script:CUDA_LIB_DIR}
")) {
if ((-not "
${env:OLLAMA_SKIP_CUDA_GENERATE}
") -and ("
${script:CUDA_LIB_DIR}
")) {
# Then build cuda as a dynamically loaded library
# Then build cuda as a dynamically loaded library
$nvcc
= "
$
script
:
CUDA_LIB_DIR
\nvcc.exe
"
$nvcc
= "
$
script
:
CUDA_LIB_DIR
\nvcc.exe
"
$
script
:
CUDA_VERSION
=(get-item (
$nvcc
| split-path | split-path)).Basename
$
script
:
CUDA_VERSION
=(
(
get-item (
$nvcc
| split-path | split-path)).Basename
-Split "
\.
")[0]
if (
$null
-ne
$
script
:
CUDA_VERSION
) {
if (
$null
-ne
$
script
:
CUDA_VERSION
) {
$
script
:
CUDA_VARIANT
="
_
"+
$
script
:
CUDA_VERSION
$
script
:
CUDA_VARIANT
="
_
"+
$
script
:
CUDA_VERSION
}
}
...
@@ -273,9 +273,9 @@ function build_cuda() {
...
@@ -273,9 +273,9 @@ function build_cuda() {
"
-DGGML_CUDA
=
ON
",
"
-DGGML_CUDA
=
ON
",
"
-DGGML_AVX
=
on
",
"
-DGGML_AVX
=
on
",
"
-DGGML_AVX2
=
off
",
"
-DGGML_AVX2
=
off
",
"
-DC
UDAToolkit_INCLUDE_DIR
=
$
script
:
CUDA_INCLUDE_DIR
",
"
-DC
MAKE_CUDA_FLAGS
=
-t6
",
"
-DCMAKE_CUDA_
FLAGS
=
-t8
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
",
"
-DCMAKE_CUDA_
ARCHITECTURES
=
${script:CMAKE_CUDA_ARCHITECTURES}
"
"
-DCMAKE_CUDA_
COMPILER_TOOLKIT_ROOT
=
$
env
:
CUDA_PATH
"
)
)
if (
$null
-ne
$
env
:
OLLAMA_CUSTOM_CUDA_DEFS
) {
if (
$null
-ne
$
env
:
OLLAMA_CUSTOM_CUDA_DEFS
) {
write-host "
OLLAMA_CUSTOM_CUDA_DEFS
=
`
"
${env:OLLAMA_CUSTOM_CUDA_DEFS}
`"
"
write-host "
OLLAMA_CUSTOM_CUDA_DEFS
=
`
"
${env:OLLAMA_CUSTOM_CUDA_DEFS}
`"
"
...
@@ -286,12 +286,11 @@ function build_cuda() {
...
@@ -286,12 +286,11 @@ function build_cuda() {
sign
sign
install
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
-ea
0
>
$null
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
write-host
"copying CUDA dependencies to
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cudart64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublas64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${script:CUDA_LIB_DIR}
\cublasLt64_*.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\cuda\"
}
else
{
}
else
{
write-host
"Skipping CUDA generation step"
write-host
"Skipping CUDA generation step"
}
}
...
@@ -325,18 +324,17 @@ function build_oneapi() {
...
@@ -325,18 +324,17 @@ function build_oneapi() {
sign
sign
install
install
rm
-ea
0
-recurse
-force
-path
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
-ea
0
>
$null
md
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
-ea
0
>
$null
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libirngmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\libmmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_level_zero.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_unified_runtime.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\pi_win_proxy_loader.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\svml_dispmd.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\compiler\latest\bin\sycl7.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_core.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_sycl_blas.4.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\"
cp
"
${env:ONEAPI_ROOT}
\mkl\latest\bin\mkl_tbb_thread.2.dll"
"
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\oneapi\"
}
else
{
}
else
{
Write-Host
"Skipping oneAPI generation step"
Write-Host
"Skipping oneAPI generation step"
}
}
...
@@ -386,12 +384,11 @@ function build_rocm() {
...
@@ -386,12 +384,11 @@ function build_rocm() {
sign
sign
install
install
rm -ea 0 -recurse -force -path "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\rocblas\library\
" -ea 0 >
$null
md "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\rocblas\library\
" -ea 0 >
$null
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
cp "
${env:HIP_PATH}
\bin\hipblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\lib\ollama\
"
cp "
${env:HIP_PATH}
\bin\rocblas.dll
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\rocm\
"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
rocm
\rocblas\library\
"
cp "
${env:HIP_PATH}
\bin\rocblas\library\*
" "
${script:SRC_DIR}
\dist\windows-
${script:ARCH}
\
lib\ollama
\rocblas\library\
"
} else {
} else {
write-host "
Skipping
ROCm
generation
step
"
write-host "
Skipping
ROCm
generation
step
"
}
}
...
...
llm/payload.go
View file @
652c273f
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
...
@@ -82,8 +82,8 @@ func serversForGpu(info gpu.GpuInfo) []string {
// glob workDir for files that start with ollama_
// glob workDir for files that start with ollama_
availableServers
:=
getAvailableServers
()
availableServers
:=
getAvailableServers
()
requested
:=
info
.
Library
requested
:=
info
.
Library
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
{
if
info
.
Variant
!=
gpu
.
CPUCapabilityNone
.
String
()
{
requested
+=
"_"
+
info
.
Variant
.
String
()
requested
+=
"_"
+
info
.
Variant
}
}
servers
:=
[]
string
{}
servers
:=
[]
string
{}
...
...
llm/server.go
View file @
652c273f
...
@@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -306,20 +306,18 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
if
runtime
.
GOOS
==
"windows"
{
if
runtime
.
GOOS
==
"windows"
{
pathEnv
=
"PATH"
pathEnv
=
"PATH"
}
}
//
prepend
the server directory
to
LD_LIBRARY_PATH/PATH
and the parent dir for common dependencies
//
Start with
the server directory
for the
LD_LIBRARY_PATH/PATH
libraryPaths
:=
[]
string
{
dir
,
filepath
.
Dir
(
dir
)
}
libraryPaths
:=
[]
string
{
dir
}
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
if
libraryPath
,
ok
:=
os
.
LookupEnv
(
pathEnv
);
ok
{
// Append our runner directory to the path
// favor our bundled library dependencies over system libraries
// This will favor system libraries over our bundled library dependencies
libraryPaths
=
append
(
libraryPaths
,
filepath
.
SplitList
(
libraryPath
)
...
)
libraryPaths
=
append
(
libraryPaths
,
filepath
.
SplitList
(
libraryPath
)
...
)
}
}
// Note: we always put the dependency path first
// Note: we always put the dependency path first
// since this was the exact version we verified for AMD GPUs
// since this was the exact version we compiled/linked against
// and we favor what the user had in their path
if
gpus
[
0
]
.
DependencyPath
!=
""
{
if
gpus
[
0
]
.
DependencyPath
!=
""
{
//
TODO refine for multi-gpu support
//
assume gpus from the same library have the same dependency path
libraryPaths
=
append
([]
string
{
gpus
[
0
]
.
DependencyPath
},
libraryPaths
...
)
libraryPaths
=
append
([]
string
{
gpus
[
0
]
.
DependencyPath
},
libraryPaths
...
)
}
}
...
...
scripts/build_linux.sh
View file @
652c273f
...
@@ -4,6 +4,7 @@ set -eu
...
@@ -4,6 +4,7 @@ set -eu
export
VERSION
=
${
VERSION
:-
$(
git describe
--tags
--first-parent
--abbrev
=
7
--long
--dirty
--always
|
sed
-e
"s/^v//g"
)
}
export
VERSION
=
${
VERSION
:-
$(
git describe
--tags
--first-parent
--abbrev
=
7
--long
--dirty
--always
|
sed
-e
"s/^v//g"
)
}
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/ollama/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/ollama/ollama/server.mode=release
\"
'"
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/ollama/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/ollama/ollama/server.mode=release
\"
'"
GZIP
=
$(
which pigz 2>/dev/null
||
echo
"gzip"
)
BUILD_ARCH
=
${
BUILD_ARCH
:-
"amd64 arm64"
}
BUILD_ARCH
=
${
BUILD_ARCH
:-
"amd64 arm64"
}
export
AMDGPU_TARGETS
=
${
AMDGPU_TARGETS
:
=
""
}
export
AMDGPU_TARGETS
=
${
AMDGPU_TARGETS
:
=
""
}
...
@@ -21,11 +22,10 @@ for TARGETARCH in ${BUILD_ARCH}; do
...
@@ -21,11 +22,10 @@ for TARGETARCH in ${BUILD_ARCH}; do
-t
builder:
$TARGETARCH
\
-t
builder:
$TARGETARCH
\
.
.
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/ollama ./dist/ollama-linux-
$TARGETARCH
rm
-rf
./dist/linux-
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/linux-
$TARGETARCH
./dist
if
[
"
$TARGETARCH
"
=
"amd64"
]
;
then
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/ollama/ollama/dist/deps/ ./dist/
fi
docker
rm
builder-
$TARGETARCH
docker
rm
builder-
$TARGETARCH
echo
"Compressing final linux bundle..."
rm
-f
./dist/ollama-linux-
$TARGETARCH
.tgz
(
cd
dist/linux-
$TARGETARCH
&&
tar
cf -
.
|
${
GZIP
}
--best
>
../ollama-linux-
$TARGETARCH
.tgz
)
done
done
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment