Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
ac70ab67
Unverified
Commit
ac70ab67
authored
Jan 10, 2024
by
Daniel Hiltgen
Committed by
GitHub
Jan 10, 2024
Browse files
Merge pull request #1914 from dhiltgen/smarter_cuda_detection
Smarter GPU Management library detection
parents
224fbf27
3c49c3ab
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
169 additions
and
65 deletions
+169
-65
gpu/gpu.go
gpu/gpu.go
+153
-17
gpu/gpu_info_cuda.c
gpu/gpu_info_cuda.c
+5
-24
gpu/gpu_info_cuda.h
gpu/gpu_info_cuda.h
+1
-1
gpu/gpu_info_rocm.c
gpu/gpu_info_rocm.c
+7
-21
gpu/gpu_info_rocm.h
gpu/gpu_info_rocm.h
+1
-1
scripts/build_linux.sh
scripts/build_linux.sh
+2
-1
No files found.
gpu/gpu.go
View file @
ac70ab67
...
...
@@ -13,7 +13,10 @@ import "C"
import
(
"fmt"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"unsafe"
)
...
...
@@ -29,31 +32,79 @@ var gpuHandles *handles = nil
// With our current CUDA compile flags, 5.2 and older will not work properly
const
CudaComputeMajorMin
=
6
// Possible locations for the nvidia-ml library
var
CudaLinuxGlobs
=
[]
string
{
"/usr/local/cuda/lib64/libnvidia-ml.so*"
,
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*"
,
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*"
,
"/usr/lib/wsl/lib/libnvidia-ml.so*"
,
"/opt/cuda/lib64/libnvidia-ml.so*"
,
"/usr/lib*/libnvidia-ml.so*"
,
"/usr/local/lib*/libnvidia-ml.so*"
,
"/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*"
,
"/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*"
,
}
var
CudaWindowsGlobs
=
[]
string
{
"c:
\\
Windows
\\
System32
\\
nvml.dll"
,
}
var
RocmLinuxGlobs
=
[]
string
{
"/opt/rocm*/lib*/librocm_smi64.so*"
,
}
var
RocmWindowsGlobs
=
[]
string
{
"c:
\\
Windows
\\
System32
\\
rocm_smi64.dll"
,
}
// Note: gpuMutex must already be held
func
initGPUHandles
()
{
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
var
cudaMgmtName
string
var
cudaMgmtPatterns
[]
string
var
rocmMgmtName
string
var
rocmMgmtPatterns
[]
string
switch
runtime
.
GOOS
{
case
"windows"
:
cudaMgmtName
=
"nvml.dll"
cudaMgmtPatterns
=
make
([]
string
,
len
(
CudaWindowsGlobs
))
copy
(
cudaMgmtPatterns
,
CudaWindowsGlobs
)
rocmMgmtName
=
"rocm_smi64.dll"
rocmMgmtPatterns
=
make
([]
string
,
len
(
RocmWindowsGlobs
))
copy
(
rocmMgmtPatterns
,
RocmWindowsGlobs
)
case
"linux"
:
cudaMgmtName
=
"libnvidia-ml.so"
cudaMgmtPatterns
=
make
([]
string
,
len
(
CudaLinuxGlobs
))
copy
(
cudaMgmtPatterns
,
CudaLinuxGlobs
)
rocmMgmtName
=
"librocm_smi64.so"
rocmMgmtPatterns
=
make
([]
string
,
len
(
RocmLinuxGlobs
))
copy
(
rocmMgmtPatterns
,
RocmLinuxGlobs
)
default
:
return
}
log
.
Printf
(
"Detecting GPU type"
)
gpuHandles
=
&
handles
{
nil
,
nil
}
var
resp
C
.
cuda_init_resp_t
C
.
cuda_init
(
&
resp
)
if
resp
.
err
!=
nil
{
log
.
Printf
(
"CUDA not detected: %s"
,
C
.
GoString
(
resp
.
err
))
C
.
free
(
unsafe
.
Pointer
(
resp
.
err
))
cudaLibPaths
:=
FindGPULibs
(
cudaMgmtName
,
cudaMgmtPatterns
)
if
len
(
cudaLibPaths
)
>
0
{
cuda
:=
LoadCUDAMgmt
(
cudaLibPaths
)
if
cuda
!=
nil
{
log
.
Printf
(
"Nvidia GPU detected"
)
gpuHandles
.
cuda
=
cuda
return
}
}
var
resp
C
.
rocm_init_resp_t
C
.
rocm_init
(
&
resp
)
if
resp
.
err
!=
nil
{
log
.
Printf
(
"ROCm not detected: %s"
,
C
.
GoString
(
resp
.
err
))
C
.
free
(
unsafe
.
Pointer
(
resp
.
err
))
}
else
{
rocmLibPaths
:=
FindGPULibs
(
rocmMgmtName
,
rocmMgmtPatterns
)
if
len
(
rocmLibPaths
)
>
0
{
rocm
:=
LoadROCMMgmt
(
rocmLibPaths
)
if
rocm
!=
nil
{
log
.
Printf
(
"Radeon GPU detected"
)
rocm
:
=
r
esp
.
rh
gpuHandles
.
rocm
=
&
rocm
gpuHandles
.
rocm
=
r
ocm
return
}
}
else
{
log
.
Printf
(
"Nvidia GPU detected"
)
cuda
:=
resp
.
ch
gpuHandles
.
cuda
=
&
cuda
}
}
...
...
@@ -143,3 +194,88 @@ func CheckVRAM() (int64, error) {
return
0
,
fmt
.
Errorf
(
"no GPU detected"
)
// TODO - better handling of CPU based memory determiniation
}
func
FindGPULibs
(
baseLibName
string
,
patterns
[]
string
)
[]
string
{
// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
var
ldPaths
[]
string
gpuLibPaths
:=
[]
string
{}
log
.
Printf
(
"Searching for GPU management library %s"
,
baseLibName
)
switch
runtime
.
GOOS
{
case
"windows"
:
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"PATH"
),
";"
)
case
"linux"
:
ldPaths
=
strings
.
Split
(
os
.
Getenv
(
"LD_LIBRARY_PATH"
),
":"
)
default
:
return
gpuLibPaths
}
// Start with whatever we find in the PATH/LD_LIBRARY_PATH
for
_
,
ldPath
:=
range
ldPaths
{
d
,
err
:=
filepath
.
Abs
(
ldPath
)
if
err
!=
nil
{
continue
}
patterns
=
append
(
patterns
,
filepath
.
Join
(
d
,
baseLibName
+
"*"
))
}
for
_
,
pattern
:=
range
patterns
{
// Ignore glob discovery errors
matches
,
_
:=
filepath
.
Glob
(
pattern
)
for
_
,
match
:=
range
matches
{
// Resolve any links so we don't try the same lib multiple times
// and weed out any dups across globs
libPath
:=
match
tmp
:=
match
var
err
error
for
;
err
==
nil
;
tmp
,
err
=
os
.
Readlink
(
libPath
)
{
if
!
filepath
.
IsAbs
(
tmp
)
{
tmp
=
filepath
.
Join
(
filepath
.
Dir
(
libPath
),
tmp
)
}
libPath
=
tmp
}
new
:=
true
for
_
,
cmp
:=
range
gpuLibPaths
{
if
cmp
==
libPath
{
new
=
false
break
}
}
if
new
{
gpuLibPaths
=
append
(
gpuLibPaths
,
libPath
)
}
}
}
log
.
Printf
(
"Discovered GPU libraries: %v"
,
gpuLibPaths
)
return
gpuLibPaths
}
func
LoadCUDAMgmt
(
cudaLibPaths
[]
string
)
*
C
.
cuda_handle_t
{
var
resp
C
.
cuda_init_resp_t
for
_
,
libPath
:=
range
cudaLibPaths
{
lib
:=
C
.
CString
(
libPath
)
defer
C
.
free
(
unsafe
.
Pointer
(
lib
))
C
.
cuda_init
(
lib
,
&
resp
)
if
resp
.
err
!=
nil
{
log
.
Printf
(
"Unable to load CUDA management library %s: %s"
,
libPath
,
C
.
GoString
(
resp
.
err
))
C
.
free
(
unsafe
.
Pointer
(
resp
.
err
))
}
else
{
return
&
resp
.
ch
}
}
return
nil
}
func
LoadROCMMgmt
(
rocmLibPaths
[]
string
)
*
C
.
rocm_handle_t
{
var
resp
C
.
rocm_init_resp_t
for
_
,
libPath
:=
range
rocmLibPaths
{
lib
:=
C
.
CString
(
libPath
)
defer
C
.
free
(
unsafe
.
Pointer
(
lib
))
C
.
rocm_init
(
lib
,
&
resp
)
if
resp
.
err
!=
nil
{
log
.
Printf
(
"Unable to load ROCm management library %s: %s"
,
libPath
,
C
.
GoString
(
resp
.
err
))
C
.
free
(
unsafe
.
Pointer
(
resp
.
err
))
}
else
{
return
&
resp
.
rh
}
}
return
nil
}
gpu/gpu_info_cuda.c
View file @
ac70ab67
...
...
@@ -4,26 +4,9 @@
#include <string.h>
#ifndef _WIN32
const
char
*
cuda_lib_paths
[]
=
{
"libnvidia-ml.so"
,
"/usr/local/cuda/lib64/libnvidia-ml.so"
,
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so"
,
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1"
,
"/usr/lib/wsl/lib/libnvidia-ml.so.1"
,
// TODO Maybe glob?
NULL
,
};
#else
const
char
*
cuda_lib_paths
[]
=
{
"nvml.dll"
,
""
,
NULL
,
};
#endif
#define CUDA_LOOKUP_SIZE 6
void
cuda_init
(
cuda_init_resp_t
*
resp
)
{
void
cuda_init
(
char
*
cuda_lib_path
,
cuda_init_resp_t
*
resp
)
{
nvmlReturn_t
ret
;
resp
->
err
=
NULL
;
const
int
buflen
=
256
;
...
...
@@ -42,16 +25,12 @@ void cuda_init(cuda_init_resp_t *resp) {
{
"nvmlDeviceGetCudaComputeCapability"
,
(
void
*
)
&
resp
->
ch
.
getComputeCapability
},
};
for
(
i
=
0
;
cuda_lib_paths
[
i
]
!=
NULL
&&
resp
->
ch
.
handle
==
NULL
;
i
++
)
{
resp
->
ch
.
handle
=
LOAD_LIBRARY
(
cuda_lib_paths
[
i
],
RTLD_LAZY
);
}
resp
->
ch
.
handle
=
LOAD_LIBRARY
(
cuda_lib_path
,
RTLD_LAZY
);
if
(
!
resp
->
ch
.
handle
)
{
// TODO improve error message, as the LOAD_ERR will have typically have the
// final path that was checked which might be confusing.
char
*
msg
=
LOAD_ERR
();
snprintf
(
buf
,
buflen
,
"Unable to load %s library to query for Nvidia GPUs: %s"
,
cuda_lib_path
s
[
0
]
,
msg
);
cuda_lib_path
,
msg
);
free
(
msg
);
resp
->
err
=
strdup
(
buf
);
return
;
...
...
@@ -73,6 +52,8 @@ void cuda_init(cuda_init_resp_t *resp) {
ret
=
(
*
resp
->
ch
.
initFn
)();
if
(
ret
!=
NVML_SUCCESS
)
{
UNLOAD_LIBRARY
(
resp
->
ch
.
handle
);
resp
->
ch
.
handle
=
NULL
;
snprintf
(
buf
,
buflen
,
"nvml vram init failure: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
}
...
...
gpu/gpu_info_cuda.h
View file @
ac70ab67
...
...
@@ -36,7 +36,7 @@ typedef struct cuda_compute_capability {
int
minor
;
}
cuda_compute_capability_t
;
void
cuda_init
(
cuda_init_resp_t
*
resp
);
void
cuda_init
(
char
*
cuda_lib_path
,
cuda_init_resp_t
*
resp
);
void
cuda_check_vram
(
cuda_handle_t
ch
,
mem_info_t
*
resp
);
void
cuda_compute_capability
(
cuda_handle_t
ch
,
cuda_compute_capability_t
*
cc
);
...
...
gpu/gpu_info_rocm.c
View file @
ac70ab67
...
...
@@ -4,22 +4,7 @@
#include <string.h>
#ifndef _WIN32
const
char
*
rocm_lib_paths
[]
=
{
"librocm_smi64.so"
,
"/opt/rocm/lib/librocm_smi64.so"
,
NULL
,
};
#else
// TODO untested
const
char
*
rocm_lib_paths
[]
=
{
"rocm_smi64.dll"
,
"/opt/rocm/lib/rocm_smi64.dll"
,
NULL
,
};
#endif
void
rocm_init
(
rocm_init_resp_t
*
resp
)
{
void
rocm_init
(
char
*
rocm_lib_path
,
rocm_init_resp_t
*
resp
)
{
rsmi_status_t
ret
;
resp
->
err
=
NULL
;
const
int
buflen
=
256
;
...
...
@@ -36,14 +21,12 @@ void rocm_init(rocm_init_resp_t *resp) {
// { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
};
for
(
i
=
0
;
rocm_lib_paths
[
i
]
!=
NULL
&&
resp
->
rh
.
handle
==
NULL
;
i
++
)
{
resp
->
rh
.
handle
=
LOAD_LIBRARY
(
rocm_lib_paths
[
i
],
RTLD_LAZY
);
}
resp
->
rh
.
handle
=
LOAD_LIBRARY
(
rocm_lib_path
,
RTLD_LAZY
);
if
(
!
resp
->
rh
.
handle
)
{
char
*
msg
=
LOAD_ERR
();
snprintf
(
buf
,
buflen
,
"Unable to load %s library to query for Radeon GPUs: %s
\n
"
,
rocm_lib_path
s
[
0
]
,
msg
);
rocm_lib_path
,
msg
);
free
(
msg
);
resp
->
err
=
strdup
(
buf
);
return
;
...
...
@@ -53,6 +36,7 @@ void rocm_init(rocm_init_resp_t *resp) {
*
l
[
i
].
p
=
LOAD_SYMBOL
(
resp
->
rh
.
handle
,
l
[
i
].
s
);
if
(
!
l
[
i
].
p
)
{
UNLOAD_LIBRARY
(
resp
->
rh
.
handle
);
resp
->
rh
.
handle
=
NULL
;
char
*
msg
=
LOAD_ERR
();
snprintf
(
buf
,
buflen
,
"symbol lookup for %s failed: %s"
,
l
[
i
].
s
,
msg
);
...
...
@@ -64,6 +48,8 @@ void rocm_init(rocm_init_resp_t *resp) {
ret
=
(
*
resp
->
rh
.
initFn
)(
0
);
if
(
ret
!=
RSMI_STATUS_SUCCESS
)
{
UNLOAD_LIBRARY
(
resp
->
rh
.
handle
);
resp
->
rh
.
handle
=
NULL
;
snprintf
(
buf
,
buflen
,
"rocm vram init failure: %d"
,
ret
);
resp
->
err
=
strdup
(
buf
);
}
...
...
@@ -83,7 +69,7 @@ void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
int
i
;
if
(
h
.
handle
==
NULL
)
{
resp
->
err
=
strdup
(
"
nvml
handle
sn'
t initialized"
);
resp
->
err
=
strdup
(
"
rocm
handle
no
t initialized"
);
return
;
}
...
...
gpu/gpu_info_rocm.h
View file @
ac70ab67
...
...
@@ -29,7 +29,7 @@ typedef struct rocm_init_resp {
rocm_handle_t
rh
;
}
rocm_init_resp_t
;
void
rocm_init
(
rocm_init_resp_t
*
resp
);
void
rocm_init
(
char
*
rocm_lib_path
,
rocm_init_resp_t
*
resp
);
void
rocm_check_vram
(
rocm_handle_t
rh
,
mem_info_t
*
resp
);
#endif // __GPU_INFO_ROCM_H__
...
...
scripts/build_linux.sh
View file @
ac70ab67
...
...
@@ -5,9 +5,10 @@ set -eu
export
VERSION
=
${
VERSION
:-
0
.0.0
}
export
GOFLAGS
=
"'-ldflags=-w -s
\"
-X=github.com/jmorganca/ollama/version.Version=
$VERSION
\"
\"
-X=github.com/jmorganca/ollama/server.mode=release
\"
'"
BUILD_ARCH
=
${
BUILD_ARCH
:-
"amd64 arm64"
}
mkdir
-p
dist
for
TARGETARCH
in
amd64 arm64
;
do
for
TARGETARCH
in
${
BUILD_ARCH
}
;
do
docker build
--platform
=
linux/
$TARGETARCH
--build-arg
=
GOFLAGS
--build-arg
=
CGO_CFLAGS
-f
Dockerfile.build
-t
builder:
$TARGETARCH
.
docker create
--platform
linux/
$TARGETARCH
--name
builder-
$TARGETARCH
builder:
$TARGETARCH
docker
cp
builder-
$TARGETARCH
:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-
$TARGETARCH
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment