Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
dc18eee3
Unverified
Commit
dc18eee3
authored
May 09, 2024
by
Daniel Hiltgen
Committed by
GitHub
May 09, 2024
Browse files
Merge pull request #4238 from dhiltgen/gpu_info
Record more GPU information
parents
d0425f26
8727a9c1
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
150 additions
and
96 deletions
+150
-96
gpu/amd_hip_windows.go
gpu/amd_hip_windows.go
+10
-5
gpu/amd_linux.go
gpu/amd_linux.go
+61
-21
gpu/amd_windows.go
gpu/amd_windows.go
+19
-47
gpu/gpu.go
gpu/gpu.go
+11
-5
gpu/gpu_info.h
gpu/gpu_info.h
+3
-0
gpu/gpu_info_cpu.c
gpu/gpu_info_cpu.c
+0
-4
gpu/gpu_info_nvcuda.c
gpu/gpu_info_nvcuda.c
+12
-8
gpu/gpu_info_nvcuda.h
gpu/gpu_info_nvcuda.h
+3
-0
gpu/types.go
gpu/types.go
+29
-5
server/routes.go
server/routes.go
+2
-1
No files found.
gpu/amd_hip_windows.go
View file @
dc18eee3
...
...
@@ -3,7 +3,6 @@ package gpu
import
(
"fmt"
"log/slog"
"strconv"
"syscall"
"unsafe"
...
...
@@ -74,16 +73,22 @@ func (hl *HipLib) Release() {
hl
.
dll
=
0
}
func
(
hl
*
HipLib
)
AMDDriverVersion
()
(
string
,
error
)
{
func
(
hl
*
HipLib
)
AMDDriverVersion
()
(
driverMajor
,
driverMinor
int
,
err
error
)
{
if
hl
.
dll
==
0
{
return
""
,
fmt
.
Errorf
(
"dll has been unloaded"
)
return
0
,
0
,
fmt
.
Errorf
(
"dll has been unloaded"
)
}
var
version
int
status
,
_
,
err
:=
syscall
.
SyscallN
(
hl
.
hipDriverGetVersion
,
uintptr
(
unsafe
.
Pointer
(
&
version
)))
if
status
!=
hipSuccess
{
return
""
,
fmt
.
Errorf
(
"failed call to hipDriverGetVersion: %d %s"
,
status
,
err
)
return
0
,
0
,
fmt
.
Errorf
(
"failed call to hipDriverGetVersion: %d %s"
,
status
,
err
)
}
return
strconv
.
Itoa
(
version
),
nil
slog
.
Debug
(
"hipDriverGetVersion"
,
"version"
,
version
)
// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
driverMajor
=
version
/
1000
driverMinor
=
(
version
-
(
driverMajor
*
1000
))
/
10
return
driverMajor
,
driverMinor
,
nil
}
func
(
hl
*
HipLib
)
HipGetDeviceCount
()
int
{
...
...
gpu/amd_linux.go
View file @
dc18eee3
...
...
@@ -8,6 +8,7 @@ import (
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
...
...
@@ -41,10 +42,8 @@ func AMDGetGPUInfo() []GpuInfo {
}
// Opportunistic logging of driver version to aid in troubleshooting
ver
,
err
:=
AMDDriverVersion
()
if
err
==
nil
{
slog
.
Info
(
"AMD Driver: "
+
ver
)
}
else
{
driverMajor
,
driverMinor
,
err
:=
AMDDriverVersion
()
if
err
!=
nil
{
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog
.
Warn
(
"ollama recommends running the https://www.amd.com/en/support/linux-drivers"
,
"error"
,
err
)
}
...
...
@@ -91,6 +90,7 @@ func AMDGetGPUInfo() []GpuInfo {
scanner
:=
bufio
.
NewScanner
(
fp
)
isCPU
:=
false
var
major
,
minor
,
patch
uint64
var
vendor
,
device
uint64
for
scanner
.
Scan
()
{
line
:=
strings
.
TrimSpace
(
scanner
.
Text
())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
...
...
@@ -118,6 +118,26 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Debug
(
"malformed int "
+
line
)
continue
}
}
else
if
strings
.
HasPrefix
(
line
,
"vendor_id"
)
{
ver
:=
strings
.
Fields
(
line
)
if
len
(
ver
)
!=
2
{
slog
.
Debug
(
"malformed vendor_id"
,
"vendor_id"
,
line
)
continue
}
vendor
,
err
=
strconv
.
ParseUint
(
ver
[
1
],
10
,
32
)
if
err
!=
nil
{
slog
.
Debug
(
"malformed vendor_id"
+
line
)
}
}
else
if
strings
.
HasPrefix
(
line
,
"device_id"
)
{
ver
:=
strings
.
Fields
(
line
)
if
len
(
ver
)
!=
2
{
slog
.
Debug
(
"malformed device_id"
,
"device_id"
,
line
)
continue
}
device
,
err
=
strconv
.
ParseUint
(
ver
[
1
],
10
,
32
)
if
err
!=
nil
{
slog
.
Debug
(
"malformed device_id"
+
line
)
}
}
// TODO - any other properties we want to extract and record?
...
...
@@ -140,7 +160,7 @@ func AMDGetGPUInfo() []GpuInfo {
}
if
int
(
major
)
<
RocmComputeMin
{
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu too old gfx%d%
d
%x"
,
major
,
minor
,
patch
),
"gpu"
,
gpuID
)
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu too old gfx%d%
x
%x"
,
major
,
minor
,
patch
),
"gpu"
,
gpuID
)
continue
}
...
...
@@ -210,12 +230,17 @@ func AMDGetGPUInfo() []GpuInfo {
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if
totalMemory
<
IGPUMemLimit
{
slog
.
Info
(
"
amdgpu appears to be an iGPU,
skipping"
,
"
gpu
"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"
unsupported Radeon iGPU detected
skipping"
,
"
id
"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
continue
}
var
name
string
// TODO - PCI ID lookup
if
vendor
>
0
&&
device
>
0
{
name
=
fmt
.
Sprintf
(
"%04x:%04x"
,
vendor
,
device
)
}
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"available"
,
format
.
HumanBytes2
(
totalMemory
-
usedMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"available"
,
format
.
HumanBytes2
(
totalMemory
-
usedMemory
))
gpuInfo
:=
GpuInfo
{
Library
:
"rocm"
,
memInfo
:
memInfo
{
...
...
@@ -223,11 +248,11 @@ func AMDGetGPUInfo() []GpuInfo {
FreeMemory
:
(
totalMemory
-
usedMemory
),
},
ID
:
fmt
.
Sprintf
(
"%d"
,
gpuID
),
// Name: not exposed in sysfs directly, would require pci device id lookup
Major
:
int
(
major
),
Minor
:
int
(
minor
),
Patch
:
int
(
patch
),
Name
:
name
,
Compute
:
fmt
.
Sprintf
(
"gfx%d%x%x"
,
major
,
minor
,
patch
),
MinimumMemory
:
rocmMinimumMemory
,
DriverMajor
:
driverMajor
,
DriverMinor
:
driverMinor
,
}
// If the user wants to filter to a subset of devices, filter out if we aren't a match
...
...
@@ -266,7 +291,7 @@ func AMDGetGPUInfo() []GpuInfo {
}
slog
.
Debug
(
"rocm supported GPUs"
,
"types"
,
supported
)
}
gfx
:=
fmt
.
Sprintf
(
"gfx%d%d%x"
,
gpuInfo
.
Major
,
gpuInfo
.
Minor
,
gpuInfo
.
Patch
)
gfx
:=
gpuInfo
.
Compute
if
!
slices
.
Contains
[[]
string
,
string
](
supported
,
gfx
)
{
slog
.
Warn
(
"amdgpu is not supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
,
"library"
,
libDir
,
"supported_types"
,
supported
)
// TODO - consider discrete markdown just for ROCM troubleshooting?
...
...
@@ -276,7 +301,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Info
(
"amdgpu is supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
)
}
}
else
{
slog
.
Debug
(
"skipping rocm gfx compatibility check
with
HSA_OVERRIDE_GFX_VERSION
="
+
gfxOverride
)
slog
.
Info
(
"skipping rocm gfx compatibility check
"
,
"
HSA_OVERRIDE_GFX_VERSION
"
,
gfxOverride
)
}
// The GPU has passed all the verification steps and is supported
...
...
@@ -322,19 +347,34 @@ func AMDValidateLibDir() (string, error) {
return
""
,
fmt
.
Errorf
(
"no suitable rocm found, falling back to CPU"
)
}
func
AMDDriverVersion
()
(
string
,
error
)
{
_
,
err
:
=
os
.
Stat
(
DriverVersionFile
)
func
AMDDriverVersion
()
(
driverMajor
,
driverMinor
int
,
err
error
)
{
_
,
err
=
os
.
Stat
(
DriverVersionFile
)
if
err
!=
nil
{
return
""
,
fmt
.
Errorf
(
"amdgpu version file missing: %s %w"
,
DriverVersionFile
,
err
)
return
0
,
0
,
fmt
.
Errorf
(
"amdgpu version file missing: %s %w"
,
DriverVersionFile
,
err
)
}
fp
,
err
:=
os
.
Open
(
DriverVersionFile
)
if
err
!=
nil
{
return
""
,
err
return
0
,
0
,
err
}
defer
fp
.
Close
()
verString
,
err
:=
io
.
ReadAll
(
fp
)
if
err
!=
nil
{
return
""
,
err
return
0
,
0
,
err
}
pattern
:=
`\A(\d+)\.(\d+).*`
regex
:=
regexp
.
MustCompile
(
pattern
)
match
:=
regex
.
FindStringSubmatch
(
string
(
verString
))
if
len
(
match
)
<
2
{
return
0
,
0
,
fmt
.
Errorf
(
"malformed version string %s"
,
string
(
verString
))
}
driverMajor
,
err
=
strconv
.
Atoi
(
match
[
1
])
if
err
!=
nil
{
return
0
,
0
,
err
}
driverMinor
,
err
=
strconv
.
Atoi
(
match
[
2
])
if
err
!=
nil
{
return
0
,
0
,
err
}
return
strings
.
TrimSpace
(
string
(
verString
))
,
nil
return
driverMajor
,
driverMinor
,
nil
}
gpu/amd_windows.go
View file @
dc18eee3
...
...
@@ -7,7 +7,6 @@ import (
"os"
"path/filepath"
"slices"
"strconv"
"strings"
"github.com/ollama/ollama/format"
...
...
@@ -34,13 +33,12 @@ func AMDGetGPUInfo() []GpuInfo {
}
defer
hl
.
Release
()
ver
,
err
:=
hl
.
AMDDriverVersion
()
if
err
==
nil
{
slog
.
Info
(
"AMD Driver: "
+
ver
)
}
else
{
// For now this is benign, but we may eventually need to fail compatibility checks
slog
.
Debug
(
"error looking up amd driver version"
,
"error"
,
err
)
}
// TODO - this reports incorrect version information, so omitting for now
// driverMajor, driverMinor, err := hl.AMDDriverVersion()
// if err != nil {
// // For now this is benign, but we may eventually need to fail compatibility checks
// slog.Debug("error looking up amd driver version", "error", err)
// }
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count
:=
hl
.
HipGetDeviceCount
()
...
...
@@ -62,10 +60,10 @@ func AMDGetGPUInfo() []GpuInfo {
return
nil
}
}
else
{
slog
.
Debug
(
"skipping rocm gfx compatibility check
with
HSA_OVERRIDE_GFX_VERSION
="
+
gfxOverride
)
slog
.
Info
(
"skipping rocm gfx compatibility check
"
,
"
HSA_OVERRIDE_GFX_VERSION
"
,
gfxOverride
)
}
slog
.
Info
(
"detected hip devices"
,
"count"
,
count
)
slog
.
Debug
(
"detected hip devices"
,
"count"
,
count
)
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
for
i
:=
0
;
i
<
count
;
i
++
{
err
=
hl
.
HipSetDevice
(
i
)
...
...
@@ -85,18 +83,11 @@ func AMDGetGPUInfo() []GpuInfo {
// Can luid be used on windows for setting visible devices (and is it actually set?)
n
=
bytes
.
IndexByte
(
props
.
GcnArchName
[
:
],
0
)
gfx
:=
string
(
props
.
GcnArchName
[
:
n
])
slog
.
Info
(
"hip device"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
var
major
,
minor
,
patch
string
switch
len
(
gfx
)
{
case
6
:
major
,
minor
,
patch
=
gfx
[
3
:
4
],
gfx
[
4
:
5
],
gfx
[
5
:
]
case
7
:
major
,
minor
,
patch
=
gfx
[
3
:
5
],
gfx
[
5
:
6
],
gfx
[
6
:
]
}
slog
.
Debug
(
"hip device"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!?
if
strings
.
EqualFold
(
name
,
iGPUName
)
{
slog
.
Info
(
"iGPU detected skipping"
,
"id"
,
i
)
slog
.
Info
(
"
unsupported Radeon
iGPU detected skipping"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
continue
}
if
gfxOverride
==
""
{
...
...
@@ -106,7 +97,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Warn
(
"See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage"
)
continue
}
else
{
slog
.
Info
(
"amdgpu is supported"
,
"gpu"
,
i
,
"gpu_type"
,
gfx
)
slog
.
Debug
(
"amdgpu is supported"
,
"gpu"
,
i
,
"gpu_type"
,
gfx
)
}
}
...
...
@@ -124,8 +115,8 @@ func AMDGetGPUInfo() []GpuInfo {
// TODO revisit this once ROCm v6 is available on windows.
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
i
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
i
,
"available"
,
format
.
HumanBytes2
(
freeMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
i
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
i
,
"available"
,
format
.
HumanBytes2
(
freeMemory
))
gpuInfo
:=
GpuInfo
{
Library
:
"rocm"
,
memInfo
:
memInfo
{
...
...
@@ -135,31 +126,12 @@ func AMDGetGPUInfo() []GpuInfo {
ID
:
fmt
.
Sprintf
(
"%d"
,
i
),
// TODO this is probably wrong if we specify visible devices
DependencyPath
:
libDir
,
MinimumMemory
:
rocmMinimumMemory
,
}
if
major
!=
""
{
gpuInfo
.
Major
,
err
=
strconv
.
Atoi
(
major
)
if
err
!=
nil
{
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
}
}
if
minor
!=
""
{
gpuInfo
.
Minor
,
err
=
strconv
.
Atoi
(
minor
)
if
err
!=
nil
{
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
}
}
if
patch
!=
""
{
// Patch rev is hex; e.g. gfx90a
p
,
err
:=
strconv
.
ParseInt
(
patch
,
16
,
0
)
if
err
!=
nil
{
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
}
else
{
gpuInfo
.
Patch
=
int
(
p
)
}
}
if
gpuInfo
.
Major
<
RocmComputeMin
{
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu [%s] too old gfx%d%d%x"
,
gpuInfo
.
ID
,
gpuInfo
.
Major
,
gpuInfo
.
Minor
,
gpuInfo
.
Patch
))
continue
Name
:
name
,
Compute
:
gfx
,
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
// DriverMajor: driverMajor,
// DriverMinor: driverMinor,
}
resp
=
append
(
resp
,
gpuInfo
)
...
...
gpu/gpu.go
View file @
dc18eee3
...
...
@@ -119,12 +119,12 @@ func initGPUHandles() *handles {
return
gpuHandles
}
slog
.
Info
(
"Detecting GPUs"
)
slog
.
Debug
(
"Detecting GPUs"
)
nvcudaLibPaths
:=
FindGPULibs
(
nvcudaMgmtName
,
nvcudaMgmtPatterns
)
if
len
(
nvcudaLibPaths
)
>
0
{
deviceCount
,
nvcuda
,
libPath
:=
LoadNVCUDAMgmt
(
nvcudaLibPaths
)
if
nvcuda
!=
nil
{
slog
.
Info
(
"detected GPUs"
,
"count"
,
deviceCount
,
"library"
,
libPath
)
slog
.
Debug
(
"detected GPUs"
,
"count"
,
deviceCount
,
"library"
,
libPath
)
gpuHandles
.
nvcuda
=
nvcuda
gpuHandles
.
deviceCount
=
deviceCount
return
gpuHandles
...
...
@@ -135,7 +135,7 @@ func initGPUHandles() *handles {
if
len
(
cudartLibPaths
)
>
0
{
deviceCount
,
cudart
,
libPath
:=
LoadCUDARTMgmt
(
cudartLibPaths
)
if
cudart
!=
nil
{
slog
.
Info
(
"detected GPUs"
,
"library"
,
libPath
,
"count"
,
deviceCount
)
slog
.
Debug
(
"detected GPUs"
,
"library"
,
libPath
,
"count"
,
deviceCount
)
gpuHandles
.
cudart
=
cudart
gpuHandles
.
deviceCount
=
deviceCount
return
gpuHandles
...
...
@@ -184,10 +184,14 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
:=
GpuInfo
{
Library
:
"cuda"
,
}
var
driverMajor
int
var
driverMinor
int
if
gpuHandles
.
cudart
!=
nil
{
C
.
cudart_check_vram
(
*
gpuHandles
.
cudart
,
C
.
int
(
i
),
&
memInfo
)
}
else
{
C
.
nvcuda_check_vram
(
*
gpuHandles
.
nvcuda
,
C
.
int
(
i
),
&
memInfo
)
driverMajor
=
int
(
gpuHandles
.
nvcuda
.
driver_major
)
driverMinor
=
int
(
gpuHandles
.
nvcuda
.
driver_minor
)
}
if
memInfo
.
err
!=
nil
{
slog
.
Info
(
"error looking up nvidia GPU memory"
,
"error"
,
C
.
GoString
(
memInfo
.
err
))
...
...
@@ -201,10 +205,12 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Major
=
int
(
memInfo
.
major
)
gpuInfo
.
Minor
=
int
(
memInfo
.
minor
)
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
int
(
driverMajor
)
gpuInfo
.
DriverMinor
=
int
(
driverMinor
)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp
=
append
(
resp
,
gpuInfo
)
...
...
gpu/gpu_info.h
View file @
dc18eee3
...
...
@@ -39,16 +39,19 @@ extern "C" {
#endif
#define GPU_ID_LEN 64
#define GPU_NAME_LEN 96
typedef
struct
mem_info
{
char
*
err
;
// If non-nill, caller responsible for freeing
char
gpu_id
[
GPU_ID_LEN
];
char
gpu_name
[
GPU_NAME_LEN
];
uint64_t
total
;
uint64_t
free
;
// Compute Capability
int
major
;
int
minor
;
int
patch
;
}
mem_info_t
;
void
cpu_check_ram
(
mem_info_t
*
resp
);
...
...
gpu/gpu_info_cpu.c
View file @
dc18eee3
...
...
@@ -10,8 +10,6 @@ void cpu_check_ram(mem_info_t *resp) {
if
(
GlobalMemoryStatusEx
(
&
info
)
!=
0
)
{
resp
->
total
=
info
.
ullTotalPhys
;
resp
->
free
=
info
.
ullAvailPhys
;
resp
->
major
=
0
;
resp
->
minor
=
0
;
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
}
else
{
resp
->
err
=
LOAD_ERR
();
...
...
@@ -31,8 +29,6 @@ void cpu_check_ram(mem_info_t *resp) {
}
else
{
resp
->
total
=
info
.
totalram
*
info
.
mem_unit
;
resp
->
free
=
info
.
freeram
*
info
.
mem_unit
;
resp
->
major
=
0
;
resp
->
minor
=
0
;
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
}
return
;
...
...
gpu/gpu_info_nvcuda.c
View file @
dc18eee3
...
...
@@ -22,6 +22,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
{
"cuDeviceGet"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGet
},
{
"cuDeviceGetAttribute"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetAttribute
},
{
"cuDeviceGetUuid"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetUuid
},
{
"cuDeviceGetName"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetName
},
{
"cuCtxCreate_v3"
,
(
void
*
)
&
resp
->
ch
.
cuCtxCreate_v3
},
{
"cuMemGetInfo_v2"
,
(
void
*
)
&
resp
->
ch
.
cuMemGetInfo_v2
},
{
"cuCtxDestroy"
,
(
void
*
)
&
resp
->
ch
.
cuCtxDestroy
},
...
...
@@ -70,18 +71,17 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
}
int
version
=
0
;
nvcudaDriverVersion_t
driverVersion
;
driverVersion
.
major
=
0
;
driverVersion
.
minor
=
0
;
resp
->
ch
.
driver_major
=
0
;
resp
->
ch
.
driver_minor
=
0
;
// Report driver version if we're in verbose mode, ignore errors
ret
=
(
*
resp
->
ch
.
cuDriverGetVersion
)(
&
version
);
if
(
ret
!=
CUDA_SUCCESS
)
{
LOG
(
resp
->
ch
.
verbose
,
"cuDriverGetVersion failed: %d
\n
"
,
ret
);
}
else
{
driverVersion
.
major
=
version
/
1000
;
driverVersion
.
minor
=
(
version
-
(
driverVersion
.
major
*
1000
))
/
10
;
LOG
(
resp
->
ch
.
verbose
,
"CUDA driver version: %d
-
%d
\n
"
,
driverVersion
.
major
,
driverVersion
.
minor
);
resp
->
ch
.
driver_
major
=
version
/
1000
;
resp
->
ch
.
driver_
minor
=
(
version
-
(
resp
->
ch
.
driver_
major
*
1000
))
/
10
;
LOG
(
resp
->
ch
.
verbose
,
"CUDA driver version: %d
.
%d
\n
"
,
resp
->
ch
.
driver_major
,
resp
->
ch
.
driver_
minor
);
}
ret
=
(
*
resp
->
ch
.
cuDeviceGetCount
)(
&
resp
->
num_devices
);
...
...
@@ -117,8 +117,6 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
return
;
}
resp
->
major
=
0
;
resp
->
minor
=
0
;
int
major
=
0
;
int
minor
=
0
;
ret
=
(
*
h
.
cuDeviceGetAttribute
)(
&
major
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
device
);
...
...
@@ -161,6 +159,12 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
);
}
ret
=
(
*
h
.
cuDeviceGetName
)(
&
resp
->
gpu_name
[
0
],
GPU_NAME_LEN
,
device
);
if
(
ret
!=
CUDA_SUCCESS
)
{
LOG
(
h
.
verbose
,
"[%d] device name lookup failure: %d
\n
"
,
i
,
ret
);
resp
->
gpu_name
[
0
]
=
'\0'
;
}
// To get memory we have to set (and release) a context
ret
=
(
*
h
.
cuCtxCreate_v3
)(
&
ctx
,
NULL
,
0
,
0
,
device
);
if
(
ret
!=
CUDA_SUCCESS
)
{
...
...
gpu/gpu_info_nvcuda.h
View file @
dc18eee3
...
...
@@ -44,12 +44,15 @@ typedef void* CUcontext;
typedef
struct
nvcuda_handle
{
void
*
handle
;
uint16_t
verbose
;
int
driver_major
;
int
driver_minor
;
CUresult
(
*
cuInit
)(
unsigned
int
Flags
);
CUresult
(
*
cuDriverGetVersion
)(
int
*
driverVersion
);
CUresult
(
*
cuDeviceGetCount
)(
int
*
);
CUresult
(
*
cuDeviceGet
)(
CUdevice
*
device
,
int
ordinal
);
CUresult
(
*
cuDeviceGetAttribute
)(
int
*
pi
,
CUdevice_attribute
attrib
,
CUdevice
dev
);
CUresult
(
*
cuDeviceGetUuid
)(
CUuuid
*
uuid
,
CUdevice
dev
);
// signature compatible with cuDeviceGetUuid_v2
CUresult
(
*
cuDeviceGetName
)(
char
*
name
,
int
len
,
CUdevice
dev
);
// Context specific aspects
CUresult
(
*
cuCtxCreate_v3
)(
CUcontext
*
pctx
,
void
*
params
,
int
len
,
unsigned
int
flags
,
CUdevice
dev
);
...
...
gpu/types.go
View file @
dc18eee3
package
gpu
import
(
"fmt"
"log/slog"
"github.com/ollama/ollama/format"
)
type
memInfo
struct
{
TotalMemory
uint64
`json:"total_memory,omitempty"`
FreeMemory
uint64
`json:"free_memory,omitempty"`
...
...
@@ -22,9 +29,11 @@ type GpuInfo struct {
// GPU information
ID
string
`json:"gpu_id"`
// string to use for selection of this specific GPU
Name
string
`json:"name"`
// user friendly name if available
Major
int
`json:"major,omitempty"`
// Major compatibility version (CC or gfx)
Minor
int
`json:"minor,omitempty"`
// Minor compatibility version (CC or gfx)
Patch
int
`json:"patch,omitempty"`
// Patch compatibility only matters on AMD
Compute
string
`json:"compute"`
// Compute Capability or gfx
// Driver Information - TODO no need to put this on each GPU
DriverMajor
int
`json:"driver_major,omitempty"`
DriverMinor
int
`json:"driver_minor,omitempty"`
// TODO other performance capability info to help in scheduling decisions
}
...
...
@@ -56,6 +65,21 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
return
resp
}
// Report the GPU information into the log an Info level
func
(
l
GpuInfoList
)
LogDetails
()
{
for
_
,
g
:=
range
l
{
slog
.
Info
(
"inference compute"
,
"id"
,
g
.
ID
,
"library"
,
g
.
Library
,
"compute"
,
g
.
Compute
,
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"name"
,
g
.
Name
,
"total"
,
format
.
HumanBytes2
(
g
.
TotalMemory
),
"available"
,
format
.
HumanBytes2
(
g
.
FreeMemory
),
)
}
}
// Sort by Free Space
type
ByFreeMemory
[]
GpuInfo
...
...
server/routes.go
View file @
dc18eee3
...
...
@@ -1065,7 +1065,8 @@ func Serve(ln net.Listener) error {
// At startup we retrieve GPU information so we can get log messages before loading a model
// This will log warnings to the log in case we have problems with detected GPUs
_
=
gpu
.
GetGPUInfo
()
gpus
:=
gpu
.
GetGPUInfo
()
gpus
.
LogDetails
()
return
srvr
.
Serve
(
ln
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment