Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
a3f7dd3e
Commit
a3f7dd3e
authored
Jun 24, 2025
by
Devon Rifkin
Browse files
Merge branch 'main' into drifkin/array-head-count-simple
parents
b2b270ad
c85c0ebf
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
86 additions
and
72 deletions
+86
-72
.github/workflows/release.yaml
.github/workflows/release.yaml
+6
-10
.github/workflows/test.yaml
.github/workflows/test.yaml
+9
-6
CMakeLists.txt
CMakeLists.txt
+7
-4
CMakePresets.json
CMakePresets.json
+0
-13
Dockerfile
Dockerfile
+7
-17
discover/cuda_common.go
discover/cuda_common.go
+4
-0
discover/path.go
discover/path.go
+1
-1
docs/gpu.md
docs/gpu.md
+1
-1
docs/troubleshooting.md
docs/troubleshooting.md
+1
-1
llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch
...ches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch
+32
-0
llm/server.go
llm/server.go
+8
-1
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
+10
-2
scripts/build_windows.ps1
scripts/build_windows.ps1
+0
-14
scripts/env.sh
scripts/env.sh
+0
-2
No files found.
.github/workflows/release.yaml
View file @
a3f7dd3e
...
...
@@ -103,11 +103,6 @@ jobs:
arch
:
[
amd64
]
preset
:
[
'
CPU'
]
include
:
-
os
:
windows
arch
:
amd64
preset
:
'
CUDA
11'
install
:
https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
cuda-version
:
'
11.3'
-
os
:
windows
arch
:
amd64
preset
:
'
CUDA
12'
...
...
@@ -160,6 +155,9 @@ jobs:
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
-
if
:
matrix.preset == 'CPU'
run
:
|
echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
...
...
@@ -178,8 +176,8 @@ jobs:
key
:
ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
-
name
:
Build target "${{ matrix.preset }}"
run
:
|
Import-Module 'C:\Program Files
(x86)
\Microsoft Visual Studio\20
19
\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files
(x86)
\Microsoft Visual Studio\20
19
\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
Import-Module 'C:\Program Files\Microsoft Visual Studio\20
22
\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\20
22
\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
cmake --preset "${{ matrix.preset }}"
cmake --build --parallel --preset "${{ matrix.preset }}"
cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8
...
...
@@ -246,7 +244,7 @@ jobs:
dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe
windows-sign
:
runs-on
:
windows
-2022
runs-on
:
windows
environment
:
release
needs
:
[
windows-depends
,
windows-build
]
steps
:
...
...
@@ -324,8 +322,6 @@ jobs:
case "$COMPONENT" in
bin/ollama) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/*.so) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_v11) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_v12) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
lib/ollama/rocm) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
...
...
.github/workflows/test.yaml
View file @
a3f7dd3e
...
...
@@ -36,7 +36,7 @@ jobs:
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
}
echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
echo changed=$(changed 'llama/llama.cpp/**
/*
' 'ml/backend/ggml/ggml/**
/*
') | tee -a $GITHUB_OUTPUT
linux
:
needs
:
[
changes
]
...
...
@@ -46,7 +46,7 @@ jobs:
include
:
-
preset
:
CPU
-
preset
:
CUDA
container
:
nvidia/cuda:1
1
.8.
0
-devel-ubuntu22.04
container
:
nvidia/cuda:1
2
.8.
1
-devel-ubuntu22.04
flags
:
'
-DCMAKE_CUDA_ARCHITECTURES=87'
-
preset
:
ROCm
container
:
rocm/dev-ubuntu-22.04:6.1.2
...
...
@@ -78,7 +78,7 @@ jobs:
include
:
-
preset
:
CPU
-
preset
:
CUDA
install
:
https://developer.download.nvidia.com/compute/cuda/1
1.3.1
/local_installers/cuda_1
1.3.1_465.89_win10
.exe
install
:
https://developer.download.nvidia.com/compute/cuda/1
2.8.0
/local_installers/cuda_1
2.8.0_571.96_windows
.exe
flags
:
'
-DCMAKE_CUDA_ARCHITECTURES=80'
-
preset
:
ROCm
install
:
https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
...
...
@@ -102,7 +102,7 @@ jobs:
$ErrorActionPreference = "Stop"
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_1
1.3
", "nvcc_1
1.3
", "cublas_1
1.3
", "cublas_dev_1
1.3
")) -NoNewWindow -Wait
Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_1
2.8
", "nvcc_1
2.8
", "cublas_1
2.8
", "cublas_dev_1
2.8
")) -NoNewWindow -Wait
}
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
...
...
@@ -120,6 +120,9 @@ jobs:
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
-
if
:
${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
uses
:
actions/cache/save@v4
with
:
...
...
@@ -133,8 +136,8 @@ jobs:
path
:
${{ github.workspace }}\.ccache
key
:
ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
-
run
:
|
Import-Module 'C:\Program Files
(x86)
\Microsoft Visual Studio\20
19
\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files
(x86)
\Microsoft Visual Studio\20
19
\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
Import-Module 'C:\Program Files\Microsoft Visual Studio\20
22
\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\20
22
\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
cmake --build --parallel --preset "${{ matrix.preset }}"
env
:
...
...
CMakeLists.txt
View file @
a3f7dd3e
...
...
@@ -78,14 +78,13 @@ if(CMAKE_CUDA_COMPILER)
find_package
(
CUDAToolkit
)
add_subdirectory
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/ml/backend/ggml/ggml/src/ggml-cuda
)
set
(
OLLAMA_CUDA_INSTALL_DIR
${
OLLAMA_INSTALL_DIR
}
/cuda_v
${
CUDAToolkit_VERSION_MAJOR
}
)
install
(
TARGETS ggml-cuda
RUNTIME_DEPENDENCIES
DIRECTORIES
${
CUDAToolkit_BIN_DIR
}
${
CUDAToolkit_LIBRARY_DIR
}
PRE_INCLUDE_REGEXES cublas cublasLt cudart
PRE_EXCLUDE_REGEXES
".*"
RUNTIME DESTINATION
${
OLLAMA_
CUDA_
INSTALL_DIR
}
COMPONENT CUDA
LIBRARY DESTINATION
${
OLLAMA_
CUDA_
INSTALL_DIR
}
COMPONENT CUDA
RUNTIME DESTINATION
${
OLLAMA_INSTALL_DIR
}
COMPONENT CUDA
LIBRARY DESTINATION
${
OLLAMA_INSTALL_DIR
}
COMPONENT CUDA
)
endif
()
...
...
@@ -116,7 +115,11 @@ if(CMAKE_HIP_COMPILER)
set
(
OLLAMA_HIP_INSTALL_DIR
${
OLLAMA_INSTALL_DIR
}
/rocm
)
install
(
TARGETS ggml-hip
RUNTIME_DEPENDENCIES
RUNTIME_DEPENDENCY_SET rocm
RUNTIME DESTINATION
${
OLLAMA_INSTALL_DIR
}
COMPONENT HIP
LIBRARY DESTINATION
${
OLLAMA_INSTALL_DIR
}
COMPONENT HIP
)
install
(
RUNTIME_DEPENDENCY_SET rocm
DIRECTORIES
${
HIP_BIN_INSTALL_DIR
}
${
HIP_LIB_INSTALL_DIR
}
PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
PRE_EXCLUDE_REGEXES
".*"
...
...
CMakePresets.json
View file @
a3f7dd3e
...
...
@@ -17,14 +17,6 @@
"name"
:
"CUDA"
,
"inherits"
:
[
"Default"
]
},
{
"name"
:
"CUDA 11"
,
"inherits"
:
[
"CUDA"
],
"cacheVariables"
:
{
"CMAKE_CUDA_ARCHITECTURES"
:
"50;52;53;60;61;70;75;80;86"
,
"CMAKE_CUDA_FLAGS"
:
"-Wno-deprecated-gpu-targets -t 2"
}
},
{
"name"
:
"CUDA 12"
,
"inherits"
:
[
"CUDA"
],
...
...
@@ -79,11 +71,6 @@
"configurePreset"
:
"CUDA"
,
"targets"
:
[
"ggml-cuda"
]
},
{
"name"
:
"CUDA 11"
,
"inherits"
:
[
"CUDA"
],
"configurePreset"
:
"CUDA 11"
},
{
"name"
:
"CUDA 12"
,
"inherits"
:
[
"CUDA"
],
...
...
Dockerfile
View file @
a3f7dd3e
...
...
@@ -7,12 +7,13 @@ ARG JETPACK5VERSION=r35.4.1
ARG
JETPACK6VERSION=r36.4.0
ARG
CMAKEVERSION=3.31.2
#
CUDA v11
require
s
gcc v10. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
#
We
require gcc v10
minimum
. v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
FROM
--platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
RUN
yum
install
-y
yum-utils
\
&&
yum-config-manager
--add-repo
https://dl.rockylinux.org/vault/rocky/8.5/AppStream/
\$
basearch/os/
\
&&
rpm
--import
https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8
\
&&
dnf
install
-y
yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 gcc-toolset-10-binutils-2.35-11.el8
\
&&
dnf
install
-y
ccache
\
&&
yum-config-manager
--add-repo
https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
ENV
PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
...
...
@@ -38,15 +39,6 @@ RUN --mount=type=cache,target=/root/.ccache \
&&
cmake
--build
--parallel
--preset
'CPU'
\
&&
cmake
--install
build
--component
CPU
--strip
--parallel
8
FROM
base AS cuda-11
ARG
CUDA11VERSION=11.3
RUN
dnf
install
-y
cuda-toolkit-
${
CUDA11VERSION
//./-
}
ENV
PATH=/usr/local/cuda-11/bin:$PATH
RUN
--mount
=
type
=
cache,target
=
/root/.ccache
\
cmake
--preset
'CUDA 11'
\
&&
cmake
--build
--parallel
--preset
'CUDA 11'
\
&&
cmake
--install
build
--component
CUDA
--strip
--parallel
8
FROM
base AS cuda-12
ARG
CUDA12VERSION=12.8
RUN
dnf
install
-y
cuda-toolkit-
${
CUDA12VERSION
//./-
}
...
...
@@ -98,17 +90,15 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
go build
-trimpath
-buildmode
=
pie
-o
/bin/ollama .
FROM
--platform=linux/amd64 scratch AS amd64
COPY
--from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
COPY
--from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
COPY
--from=cuda-12 dist/lib/ollama /lib/ollama
FROM
--platform=linux/arm64 scratch AS arm64
COPY
--from=cuda-11 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_v11
COPY
--from=cuda-12 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_v12
COPY
--from=jetpack-5 dist/lib/ollama/cuda_v11 /lib/ollama/cuda_jetpack5
COPY
--from=jetpack-6 dist/lib/ollama/cuda_v12 /lib/ollama/cuda_jetpack6
COPY
--from=cuda-12 dist/lib/ollama /lib/ollama/cuda_sbsa
COPY
--from=jetpack-5 dist/lib/ollama /lib/ollama/cuda_jetpack5
COPY
--from=jetpack-6 dist/lib/ollama /lib/ollama/cuda_jetpack6
FROM
scratch AS rocm
COPY
--from=rocm-6 dist/lib/ollama
/rocm
/lib/ollama
/rocm
COPY
--from=rocm-6 dist/lib/ollama /lib/ollama
FROM
${FLAVOR} AS archive
COPY
--from=cpu dist/lib/ollama /lib/ollama
...
...
discover/cuda_common.go
View file @
a3f7dd3e
...
...
@@ -3,6 +3,7 @@
package
discover
import
(
"fmt"
"log/slog"
"os"
"regexp"
...
...
@@ -55,10 +56,13 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
}
}
}
return
"sbsa"
}
// driver 12.0 has problems with the cuda v12 library, so run v11 on those older drivers
if
gpuInfo
.
DriverMajor
<
12
||
(
gpuInfo
.
DriverMajor
==
12
&&
gpuInfo
.
DriverMinor
==
0
)
{
// The detected driver is older than Feb 2023
slog
.
Warn
(
"old CUDA driver detected - please upgrade to a newer driver"
,
"version"
,
fmt
.
Sprintf
(
"%d.%d"
,
gpuInfo
.
DriverMajor
,
gpuInfo
.
DriverMinor
))
return
"v11"
}
return
"v12"
...
...
discover/path.go
View file @
a3f7dd3e
...
...
@@ -12,7 +12,7 @@ import (
// '../lib/ollama' on Linux and the executable's directory on macOS
// note: distribution builds, additional GPU-specific libraries are
// found in subdirectories of the returned path, such as
//
'cuda_v11',
'cuda_v12', 'rocm', etc.
// 'cuda_v12', 'rocm', etc.
var
LibOllamaPath
string
=
func
()
string
{
exe
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
...
...
docs/gpu.md
View file @
a3f7dd3e
# GPU
## Nvidia
Ollama supports Nvidia GPUs with compute capability 5.0+.
Ollama supports Nvidia GPUs with compute capability 5.0+
and driver version 531 and newer
.
Check your compute compatibility to see if your card is supported:
[
https://developer.nvidia.com/cuda-gpus
](
https://developer.nvidia.com/cuda-gpus
)
...
...
docs/troubleshooting.md
View file @
a3f7dd3e
...
...
@@ -43,7 +43,7 @@ Ollama includes multiple LLM libraries compiled for different GPUs and CPU vecto
In the server log, you will see a message that looks something like this (varies from release to release):
```
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v1
1
rocm_v5]
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v1
2
rocm_v5]
```
**Experimental LLM Library Override**
...
...
llama/patches/0018-temporary-prevent-rocm-cuda-mixed-loading.patch
0 → 100644
View file @
a3f7dd3e
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Sun, 22 Jun 2025 09:22:05 -0700
Subject: [PATCH] temporary prevent rocm+cuda mixed loading
---
ggml/src/ggml-backend-reg.cpp | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 4e67d243..8f49f084 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -573,8 +573,16 @@
void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("blas", silent, dir_path);
ggml_backend_load_best("cann", silent, dir_path);
- ggml_backend_load_best("cuda", silent, dir_path);
- ggml_backend_load_best("hip", silent, dir_path);
+
+ // Avoid mixed hip+cuda configurations
+ const char * hip_devices = std::getenv("HIP_VISIBLE_DEVICES");
+ const char * rocr_devices = std::getenv("ROCR_VISIBLE_DEVICES");
+ if (!hip_devices && !rocr_devices) {
+ ggml_backend_load_best("cuda", silent, dir_path);
+ } else {
+ ggml_backend_load_best("hip", silent, dir_path);
+ }
+
ggml_backend_load_best("kompute", silent, dir_path);
ggml_backend_load_best("metal", silent, dir_path);
ggml_backend_load_best("rpc", silent, dir_path);
llm/server.go
View file @
a3f7dd3e
...
...
@@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
gpus
=
discover
.
GetCPUInfo
()
}
// Verify the requested context size is <= the model training size
trainCtx
:=
f
.
KV
()
.
ContextLength
()
if
opts
.
NumCtx
/
numParallel
>
int
(
trainCtx
)
&&
trainCtx
>
0
{
slog
.
Warn
(
"requested context size too large for model"
,
"num_ctx"
,
opts
.
NumCtx
,
"num_parallel"
,
numParallel
,
"n_ctx_train"
,
trainCtx
)
opts
.
NumCtx
=
int
(
trainCtx
)
*
numParallel
}
estimate
:=
EstimateGPULayers
(
gpus
,
f
,
projectors
,
opts
,
numParallel
)
if
len
(
gpus
)
>
1
||
gpus
[
0
]
.
Library
!=
"cpu"
{
switch
{
...
...
@@ -311,7 +318,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
params
=
append
(
params
,
"--mmproj"
,
projectors
[
0
])
}
// iterate through compatible GPU libraries such as 'cuda_v12',
'cuda_v11',
'rocm', etc.
// iterate through compatible GPU libraries such as 'cuda_v12', 'rocm', etc.
// adding each library's respective path to the LD_LIBRARY_PATH, until finally running
// without any LD_LIBRARY_PATH flags
for
{
...
...
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
View file @
a3f7dd3e
...
...
@@ -573,8 +573,16 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best
(
"blas"
,
silent
,
dir_path
);
ggml_backend_load_best
(
"cann"
,
silent
,
dir_path
);
ggml_backend_load_best
(
"cuda"
,
silent
,
dir_path
);
ggml_backend_load_best
(
"hip"
,
silent
,
dir_path
);
// Avoid mixed hip+cuda configurations
const
char
*
hip_devices
=
std
::
getenv
(
"HIP_VISIBLE_DEVICES"
);
const
char
*
rocr_devices
=
std
::
getenv
(
"ROCR_VISIBLE_DEVICES"
);
if
(
!
hip_devices
&&
!
rocr_devices
)
{
ggml_backend_load_best
(
"cuda"
,
silent
,
dir_path
);
}
else
{
ggml_backend_load_best
(
"hip"
,
silent
,
dir_path
);
}
ggml_backend_load_best
(
"kompute"
,
silent
,
dir_path
);
ggml_backend_load_best
(
"metal"
,
silent
,
dir_path
);
ggml_backend_load_best
(
"rpc"
,
silent
,
dir_path
);
...
...
scripts/build_windows.ps1
View file @
a3f7dd3e
...
...
@@ -27,7 +27,6 @@ function checkEnv() {
$
env
:
VCToolsRedistDir
=
(
get-item
"
${MSVC_INSTALL}
\VC\Redist\MSVC\*"
)[
0
]
}
# Locate CUDA versions
# Note: this assumes every version found will be built
$cudaList
=
(
get-item
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\"
-ea
'silentlycontinue'
)
if
(
$cudaList
.
length
-eq
0
)
{
$d
=
(
get-command
-ea
'silentlycontinue'
nvcc
)
.
path
...
...
@@ -94,19 +93,6 @@ function buildOllama() {
$hashEnv
=
@{}
Get-ChildItem
env:
|
foreach
{
$hashEnv
[
$_
.
Name
]
=
$_
.
Value
}
if
(
"
$
script
:
CUDA_DIRS
"
.
Contains
(
"v11"
))
{
$hashEnv
.
Keys
|
foreach
{
if
(
$_
.
Contains
(
"CUDA_PATH_V11"
))
{
$v11
=
"
$_
"
}}
$
env
:
CUDAToolkit_ROOT
=
$hashEnv
[
$v11
]
write-host
"Building CUDA v11 backend libraries"
# Note: cuda v11 requires msvc 2019 so force the older generator
# to avoid 2022 (or newer) from being used as the default
&
cmake
--fresh
--preset
"CUDA 11"
-G
"Visual Studio 16 2019"
--install-prefix
$
script
:
DIST_DIR
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
&
cmake
--build
--preset
"CUDA 11"
--config
Release
--parallel
$
script
:
JOBS
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
&
cmake
--install
build
--component
"CUDA"
--strip
if
(
$LASTEXITCODE
-ne
0
)
{
exit
(
$LASTEXITCODE
)}
}
if
(
"
$
script
:
CUDA_DIRS
"
.
Contains
(
"v12"
))
{
$hashEnv
.
Keys
|
foreach
{
if
(
$_
.
Contains
(
"CUDA_PATH_V12"
))
{
$v12
=
"
$_
"
}}
$
env
:
CUDAToolkit_ROOT
=
$hashEnv
[
$v12
]
...
...
scripts/env.sh
View file @
a3f7dd3e
...
...
@@ -10,9 +10,7 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \
--build-arg=GOFLAGS
\
--build-arg=OLLAMA_CUSTOM_CPU_DEFS
\
--build-arg=OLLAMA_SKIP_CUDA_GENERATE
\
--build-arg=OLLAMA_SKIP_CUDA_11_GENERATE
\
--build-arg=OLLAMA_SKIP_CUDA_12_GENERATE
\
--build-arg=CUDA_V11_ARCHITECTURES
\
--build-arg=CUDA_V12_ARCHITECTURES
\
--build-arg=OLLAMA_SKIP_ROCM_GENERATE
\
--build-arg=OLLAMA_FAST_BUILD
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment