Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
dc18eee3
Unverified
Commit
dc18eee3
authored
May 09, 2024
by
Daniel Hiltgen
Committed by
GitHub
May 09, 2024
Browse files
Merge pull request #4238 from dhiltgen/gpu_info
Record more GPU information
parents
d0425f26
8727a9c1
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
150 additions
and
96 deletions
+150
-96
gpu/amd_hip_windows.go
gpu/amd_hip_windows.go
+10
-5
gpu/amd_linux.go
gpu/amd_linux.go
+61
-21
gpu/amd_windows.go
gpu/amd_windows.go
+19
-47
gpu/gpu.go
gpu/gpu.go
+11
-5
gpu/gpu_info.h
gpu/gpu_info.h
+3
-0
gpu/gpu_info_cpu.c
gpu/gpu_info_cpu.c
+0
-4
gpu/gpu_info_nvcuda.c
gpu/gpu_info_nvcuda.c
+12
-8
gpu/gpu_info_nvcuda.h
gpu/gpu_info_nvcuda.h
+3
-0
gpu/types.go
gpu/types.go
+29
-5
server/routes.go
server/routes.go
+2
-1
No files found.
gpu/amd_hip_windows.go
View file @
dc18eee3
...
@@ -3,7 +3,6 @@ package gpu
...
@@ -3,7 +3,6 @@ package gpu
import
(
import
(
"fmt"
"fmt"
"log/slog"
"log/slog"
"strconv"
"syscall"
"syscall"
"unsafe"
"unsafe"
...
@@ -74,16 +73,22 @@ func (hl *HipLib) Release() {
...
@@ -74,16 +73,22 @@ func (hl *HipLib) Release() {
hl
.
dll
=
0
hl
.
dll
=
0
}
}
func
(
hl
*
HipLib
)
AMDDriverVersion
()
(
string
,
error
)
{
func
(
hl
*
HipLib
)
AMDDriverVersion
()
(
driverMajor
,
driverMinor
int
,
err
error
)
{
if
hl
.
dll
==
0
{
if
hl
.
dll
==
0
{
return
""
,
fmt
.
Errorf
(
"dll has been unloaded"
)
return
0
,
0
,
fmt
.
Errorf
(
"dll has been unloaded"
)
}
}
var
version
int
var
version
int
status
,
_
,
err
:=
syscall
.
SyscallN
(
hl
.
hipDriverGetVersion
,
uintptr
(
unsafe
.
Pointer
(
&
version
)))
status
,
_
,
err
:=
syscall
.
SyscallN
(
hl
.
hipDriverGetVersion
,
uintptr
(
unsafe
.
Pointer
(
&
version
)))
if
status
!=
hipSuccess
{
if
status
!=
hipSuccess
{
return
""
,
fmt
.
Errorf
(
"failed call to hipDriverGetVersion: %d %s"
,
status
,
err
)
return
0
,
0
,
fmt
.
Errorf
(
"failed call to hipDriverGetVersion: %d %s"
,
status
,
err
)
}
}
return
strconv
.
Itoa
(
version
),
nil
slog
.
Debug
(
"hipDriverGetVersion"
,
"version"
,
version
)
// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
driverMajor
=
version
/
1000
driverMinor
=
(
version
-
(
driverMajor
*
1000
))
/
10
return
driverMajor
,
driverMinor
,
nil
}
}
func
(
hl
*
HipLib
)
HipGetDeviceCount
()
int
{
func
(
hl
*
HipLib
)
HipGetDeviceCount
()
int
{
...
...
gpu/amd_linux.go
View file @
dc18eee3
...
@@ -8,6 +8,7 @@ import (
...
@@ -8,6 +8,7 @@ import (
"log/slog"
"log/slog"
"os"
"os"
"path/filepath"
"path/filepath"
"regexp"
"slices"
"slices"
"strconv"
"strconv"
"strings"
"strings"
...
@@ -41,10 +42,8 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -41,10 +42,8 @@ func AMDGetGPUInfo() []GpuInfo {
}
}
// Opportunistic logging of driver version to aid in troubleshooting
// Opportunistic logging of driver version to aid in troubleshooting
ver
,
err
:=
AMDDriverVersion
()
driverMajor
,
driverMinor
,
err
:=
AMDDriverVersion
()
if
err
==
nil
{
if
err
!=
nil
{
slog
.
Info
(
"AMD Driver: "
+
ver
)
}
else
{
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog
.
Warn
(
"ollama recommends running the https://www.amd.com/en/support/linux-drivers"
,
"error"
,
err
)
slog
.
Warn
(
"ollama recommends running the https://www.amd.com/en/support/linux-drivers"
,
"error"
,
err
)
}
}
...
@@ -91,6 +90,7 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -91,6 +90,7 @@ func AMDGetGPUInfo() []GpuInfo {
scanner
:=
bufio
.
NewScanner
(
fp
)
scanner
:=
bufio
.
NewScanner
(
fp
)
isCPU
:=
false
isCPU
:=
false
var
major
,
minor
,
patch
uint64
var
major
,
minor
,
patch
uint64
var
vendor
,
device
uint64
for
scanner
.
Scan
()
{
for
scanner
.
Scan
()
{
line
:=
strings
.
TrimSpace
(
scanner
.
Text
())
line
:=
strings
.
TrimSpace
(
scanner
.
Text
())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
...
@@ -118,6 +118,26 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -118,6 +118,26 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Debug
(
"malformed int "
+
line
)
slog
.
Debug
(
"malformed int "
+
line
)
continue
continue
}
}
}
else
if
strings
.
HasPrefix
(
line
,
"vendor_id"
)
{
ver
:=
strings
.
Fields
(
line
)
if
len
(
ver
)
!=
2
{
slog
.
Debug
(
"malformed vendor_id"
,
"vendor_id"
,
line
)
continue
}
vendor
,
err
=
strconv
.
ParseUint
(
ver
[
1
],
10
,
32
)
if
err
!=
nil
{
slog
.
Debug
(
"malformed vendor_id"
+
line
)
}
}
else
if
strings
.
HasPrefix
(
line
,
"device_id"
)
{
ver
:=
strings
.
Fields
(
line
)
if
len
(
ver
)
!=
2
{
slog
.
Debug
(
"malformed device_id"
,
"device_id"
,
line
)
continue
}
device
,
err
=
strconv
.
ParseUint
(
ver
[
1
],
10
,
32
)
if
err
!=
nil
{
slog
.
Debug
(
"malformed device_id"
+
line
)
}
}
}
// TODO - any other properties we want to extract and record?
// TODO - any other properties we want to extract and record?
...
@@ -140,7 +160,7 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -140,7 +160,7 @@ func AMDGetGPUInfo() []GpuInfo {
}
}
if
int
(
major
)
<
RocmComputeMin
{
if
int
(
major
)
<
RocmComputeMin
{
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu too old gfx%d%
d
%x"
,
major
,
minor
,
patch
),
"gpu"
,
gpuID
)
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu too old gfx%d%
x
%x"
,
major
,
minor
,
patch
),
"gpu"
,
gpuID
)
continue
continue
}
}
...
@@ -210,24 +230,29 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -210,24 +230,29 @@ func AMDGetGPUInfo() []GpuInfo {
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if
totalMemory
<
IGPUMemLimit
{
if
totalMemory
<
IGPUMemLimit
{
slog
.
Info
(
"
amdgpu appears to be an iGPU,
skipping"
,
"
gpu
"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"
unsupported Radeon iGPU detected
skipping"
,
"
id
"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
continue
continue
}
}
var
name
string
// TODO - PCI ID lookup
if
vendor
>
0
&&
device
>
0
{
name
=
fmt
.
Sprintf
(
"%04x:%04x"
,
vendor
,
device
)
}
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"available"
,
format
.
HumanBytes2
(
totalMemory
-
usedMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
gpuID
,
"available"
,
format
.
HumanBytes2
(
totalMemory
-
usedMemory
))
gpuInfo
:=
GpuInfo
{
gpuInfo
:=
GpuInfo
{
Library
:
"rocm"
,
Library
:
"rocm"
,
memInfo
:
memInfo
{
memInfo
:
memInfo
{
TotalMemory
:
totalMemory
,
TotalMemory
:
totalMemory
,
FreeMemory
:
(
totalMemory
-
usedMemory
),
FreeMemory
:
(
totalMemory
-
usedMemory
),
},
},
ID
:
fmt
.
Sprintf
(
"%d"
,
gpuID
),
ID
:
fmt
.
Sprintf
(
"%d"
,
gpuID
),
// Name: not exposed in sysfs directly, would require pci device id lookup
Name
:
name
,
Major
:
int
(
major
),
Compute
:
fmt
.
Sprintf
(
"gfx%d%x%x"
,
major
,
minor
,
patch
),
Minor
:
int
(
minor
),
Patch
:
int
(
patch
),
MinimumMemory
:
rocmMinimumMemory
,
MinimumMemory
:
rocmMinimumMemory
,
DriverMajor
:
driverMajor
,
DriverMinor
:
driverMinor
,
}
}
// If the user wants to filter to a subset of devices, filter out if we aren't a match
// If the user wants to filter to a subset of devices, filter out if we aren't a match
...
@@ -266,7 +291,7 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -266,7 +291,7 @@ func AMDGetGPUInfo() []GpuInfo {
}
}
slog
.
Debug
(
"rocm supported GPUs"
,
"types"
,
supported
)
slog
.
Debug
(
"rocm supported GPUs"
,
"types"
,
supported
)
}
}
gfx
:=
fmt
.
Sprintf
(
"gfx%d%d%x"
,
gpuInfo
.
Major
,
gpuInfo
.
Minor
,
gpuInfo
.
Patch
)
gfx
:=
gpuInfo
.
Compute
if
!
slices
.
Contains
[[]
string
,
string
](
supported
,
gfx
)
{
if
!
slices
.
Contains
[[]
string
,
string
](
supported
,
gfx
)
{
slog
.
Warn
(
"amdgpu is not supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
,
"library"
,
libDir
,
"supported_types"
,
supported
)
slog
.
Warn
(
"amdgpu is not supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
,
"library"
,
libDir
,
"supported_types"
,
supported
)
// TODO - consider discrete markdown just for ROCM troubleshooting?
// TODO - consider discrete markdown just for ROCM troubleshooting?
...
@@ -276,7 +301,7 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -276,7 +301,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Info
(
"amdgpu is supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
)
slog
.
Info
(
"amdgpu is supported"
,
"gpu"
,
gpuInfo
.
ID
,
"gpu_type"
,
gfx
)
}
}
}
else
{
}
else
{
slog
.
Debug
(
"skipping rocm gfx compatibility check
with
HSA_OVERRIDE_GFX_VERSION
="
+
gfxOverride
)
slog
.
Info
(
"skipping rocm gfx compatibility check
"
,
"
HSA_OVERRIDE_GFX_VERSION
"
,
gfxOverride
)
}
}
// The GPU has passed all the verification steps and is supported
// The GPU has passed all the verification steps and is supported
...
@@ -322,19 +347,34 @@ func AMDValidateLibDir() (string, error) {
...
@@ -322,19 +347,34 @@ func AMDValidateLibDir() (string, error) {
return
""
,
fmt
.
Errorf
(
"no suitable rocm found, falling back to CPU"
)
return
""
,
fmt
.
Errorf
(
"no suitable rocm found, falling back to CPU"
)
}
}
func
AMDDriverVersion
()
(
string
,
error
)
{
func
AMDDriverVersion
()
(
driverMajor
,
driverMinor
int
,
err
error
)
{
_
,
err
:
=
os
.
Stat
(
DriverVersionFile
)
_
,
err
=
os
.
Stat
(
DriverVersionFile
)
if
err
!=
nil
{
if
err
!=
nil
{
return
""
,
fmt
.
Errorf
(
"amdgpu version file missing: %s %w"
,
DriverVersionFile
,
err
)
return
0
,
0
,
fmt
.
Errorf
(
"amdgpu version file missing: %s %w"
,
DriverVersionFile
,
err
)
}
}
fp
,
err
:=
os
.
Open
(
DriverVersionFile
)
fp
,
err
:=
os
.
Open
(
DriverVersionFile
)
if
err
!=
nil
{
if
err
!=
nil
{
return
""
,
err
return
0
,
0
,
err
}
}
defer
fp
.
Close
()
defer
fp
.
Close
()
verString
,
err
:=
io
.
ReadAll
(
fp
)
verString
,
err
:=
io
.
ReadAll
(
fp
)
if
err
!=
nil
{
if
err
!=
nil
{
return
""
,
err
return
0
,
0
,
err
}
pattern
:=
`\A(\d+)\.(\d+).*`
regex
:=
regexp
.
MustCompile
(
pattern
)
match
:=
regex
.
FindStringSubmatch
(
string
(
verString
))
if
len
(
match
)
<
2
{
return
0
,
0
,
fmt
.
Errorf
(
"malformed version string %s"
,
string
(
verString
))
}
driverMajor
,
err
=
strconv
.
Atoi
(
match
[
1
])
if
err
!=
nil
{
return
0
,
0
,
err
}
driverMinor
,
err
=
strconv
.
Atoi
(
match
[
2
])
if
err
!=
nil
{
return
0
,
0
,
err
}
}
return
strings
.
TrimSpace
(
string
(
verString
))
,
nil
return
driverMajor
,
driverMinor
,
nil
}
}
gpu/amd_windows.go
View file @
dc18eee3
...
@@ -7,7 +7,6 @@ import (
...
@@ -7,7 +7,6 @@ import (
"os"
"os"
"path/filepath"
"path/filepath"
"slices"
"slices"
"strconv"
"strings"
"strings"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
...
@@ -34,13 +33,12 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -34,13 +33,12 @@ func AMDGetGPUInfo() []GpuInfo {
}
}
defer
hl
.
Release
()
defer
hl
.
Release
()
ver
,
err
:=
hl
.
AMDDriverVersion
()
// TODO - this reports incorrect version information, so omitting for now
if
err
==
nil
{
// driverMajor, driverMinor, err := hl.AMDDriverVersion()
slog
.
Info
(
"AMD Driver: "
+
ver
)
// if err != nil {
}
else
{
// // For now this is benign, but we may eventually need to fail compatibility checks
// For now this is benign, but we may eventually need to fail compatibility checks
// slog.Debug("error looking up amd driver version", "error", err)
slog
.
Debug
(
"error looking up amd driver version"
,
"error"
,
err
)
// }
}
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count
:=
hl
.
HipGetDeviceCount
()
count
:=
hl
.
HipGetDeviceCount
()
...
@@ -62,10 +60,10 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -62,10 +60,10 @@ func AMDGetGPUInfo() []GpuInfo {
return
nil
return
nil
}
}
}
else
{
}
else
{
slog
.
Debug
(
"skipping rocm gfx compatibility check
with
HSA_OVERRIDE_GFX_VERSION
="
+
gfxOverride
)
slog
.
Info
(
"skipping rocm gfx compatibility check
"
,
"
HSA_OVERRIDE_GFX_VERSION
"
,
gfxOverride
)
}
}
slog
.
Info
(
"detected hip devices"
,
"count"
,
count
)
slog
.
Debug
(
"detected hip devices"
,
"count"
,
count
)
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
for
i
:=
0
;
i
<
count
;
i
++
{
for
i
:=
0
;
i
<
count
;
i
++
{
err
=
hl
.
HipSetDevice
(
i
)
err
=
hl
.
HipSetDevice
(
i
)
...
@@ -85,18 +83,11 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -85,18 +83,11 @@ func AMDGetGPUInfo() []GpuInfo {
// Can luid be used on windows for setting visible devices (and is it actually set?)
// Can luid be used on windows for setting visible devices (and is it actually set?)
n
=
bytes
.
IndexByte
(
props
.
GcnArchName
[
:
],
0
)
n
=
bytes
.
IndexByte
(
props
.
GcnArchName
[
:
],
0
)
gfx
:=
string
(
props
.
GcnArchName
[
:
n
])
gfx
:=
string
(
props
.
GcnArchName
[
:
n
])
slog
.
Info
(
"hip device"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
slog
.
Debug
(
"hip device"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
var
major
,
minor
,
patch
string
switch
len
(
gfx
)
{
case
6
:
major
,
minor
,
patch
=
gfx
[
3
:
4
],
gfx
[
4
:
5
],
gfx
[
5
:
]
case
7
:
major
,
minor
,
patch
=
gfx
[
3
:
5
],
gfx
[
5
:
6
],
gfx
[
6
:
]
}
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!?
// TODO Why isn't props.iGPU accurate!?
if
strings
.
EqualFold
(
name
,
iGPUName
)
{
if
strings
.
EqualFold
(
name
,
iGPUName
)
{
slog
.
Info
(
"iGPU detected skipping"
,
"id"
,
i
)
slog
.
Info
(
"
unsupported Radeon
iGPU detected skipping"
,
"id"
,
i
,
"name"
,
name
,
"gfx"
,
gfx
)
continue
continue
}
}
if
gfxOverride
==
""
{
if
gfxOverride
==
""
{
...
@@ -106,7 +97,7 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -106,7 +97,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog
.
Warn
(
"See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage"
)
slog
.
Warn
(
"See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage"
)
continue
continue
}
else
{
}
else
{
slog
.
Info
(
"amdgpu is supported"
,
"gpu"
,
i
,
"gpu_type"
,
gfx
)
slog
.
Debug
(
"amdgpu is supported"
,
"gpu"
,
i
,
"gpu_type"
,
gfx
)
}
}
}
}
...
@@ -124,8 +115,8 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -124,8 +115,8 @@ func AMDGetGPUInfo() []GpuInfo {
// TODO revisit this once ROCm v6 is available on windows.
// TODO revisit this once ROCm v6 is available on windows.
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
i
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
i
,
"total"
,
format
.
HumanBytes2
(
totalMemory
))
slog
.
Info
(
"amdgpu memory"
,
"gpu"
,
i
,
"available"
,
format
.
HumanBytes2
(
freeMemory
))
slog
.
Debug
(
"amdgpu memory"
,
"gpu"
,
i
,
"available"
,
format
.
HumanBytes2
(
freeMemory
))
gpuInfo
:=
GpuInfo
{
gpuInfo
:=
GpuInfo
{
Library
:
"rocm"
,
Library
:
"rocm"
,
memInfo
:
memInfo
{
memInfo
:
memInfo
{
...
@@ -135,31 +126,12 @@ func AMDGetGPUInfo() []GpuInfo {
...
@@ -135,31 +126,12 @@ func AMDGetGPUInfo() []GpuInfo {
ID
:
fmt
.
Sprintf
(
"%d"
,
i
),
// TODO this is probably wrong if we specify visible devices
ID
:
fmt
.
Sprintf
(
"%d"
,
i
),
// TODO this is probably wrong if we specify visible devices
DependencyPath
:
libDir
,
DependencyPath
:
libDir
,
MinimumMemory
:
rocmMinimumMemory
,
MinimumMemory
:
rocmMinimumMemory
,
}
Name
:
name
,
if
major
!=
""
{
Compute
:
gfx
,
gpuInfo
.
Major
,
err
=
strconv
.
Atoi
(
major
)
if
err
!=
nil
{
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
// DriverMajor: driverMajor,
}
// DriverMinor: driverMinor,
}
if
minor
!=
""
{
gpuInfo
.
Minor
,
err
=
strconv
.
Atoi
(
minor
)
if
err
!=
nil
{
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
}
}
if
patch
!=
""
{
// Patch rev is hex; e.g. gfx90a
p
,
err
:=
strconv
.
ParseInt
(
patch
,
16
,
0
)
if
err
!=
nil
{
slog
.
Info
(
"failed to parse version"
,
"version"
,
gfx
,
"error"
,
err
)
}
else
{
gpuInfo
.
Patch
=
int
(
p
)
}
}
if
gpuInfo
.
Major
<
RocmComputeMin
{
slog
.
Warn
(
fmt
.
Sprintf
(
"amdgpu [%s] too old gfx%d%d%x"
,
gpuInfo
.
ID
,
gpuInfo
.
Major
,
gpuInfo
.
Minor
,
gpuInfo
.
Patch
))
continue
}
}
resp
=
append
(
resp
,
gpuInfo
)
resp
=
append
(
resp
,
gpuInfo
)
...
...
gpu/gpu.go
View file @
dc18eee3
...
@@ -119,12 +119,12 @@ func initGPUHandles() *handles {
...
@@ -119,12 +119,12 @@ func initGPUHandles() *handles {
return
gpuHandles
return
gpuHandles
}
}
slog
.
Info
(
"Detecting GPUs"
)
slog
.
Debug
(
"Detecting GPUs"
)
nvcudaLibPaths
:=
FindGPULibs
(
nvcudaMgmtName
,
nvcudaMgmtPatterns
)
nvcudaLibPaths
:=
FindGPULibs
(
nvcudaMgmtName
,
nvcudaMgmtPatterns
)
if
len
(
nvcudaLibPaths
)
>
0
{
if
len
(
nvcudaLibPaths
)
>
0
{
deviceCount
,
nvcuda
,
libPath
:=
LoadNVCUDAMgmt
(
nvcudaLibPaths
)
deviceCount
,
nvcuda
,
libPath
:=
LoadNVCUDAMgmt
(
nvcudaLibPaths
)
if
nvcuda
!=
nil
{
if
nvcuda
!=
nil
{
slog
.
Info
(
"detected GPUs"
,
"count"
,
deviceCount
,
"library"
,
libPath
)
slog
.
Debug
(
"detected GPUs"
,
"count"
,
deviceCount
,
"library"
,
libPath
)
gpuHandles
.
nvcuda
=
nvcuda
gpuHandles
.
nvcuda
=
nvcuda
gpuHandles
.
deviceCount
=
deviceCount
gpuHandles
.
deviceCount
=
deviceCount
return
gpuHandles
return
gpuHandles
...
@@ -135,7 +135,7 @@ func initGPUHandles() *handles {
...
@@ -135,7 +135,7 @@ func initGPUHandles() *handles {
if
len
(
cudartLibPaths
)
>
0
{
if
len
(
cudartLibPaths
)
>
0
{
deviceCount
,
cudart
,
libPath
:=
LoadCUDARTMgmt
(
cudartLibPaths
)
deviceCount
,
cudart
,
libPath
:=
LoadCUDARTMgmt
(
cudartLibPaths
)
if
cudart
!=
nil
{
if
cudart
!=
nil
{
slog
.
Info
(
"detected GPUs"
,
"library"
,
libPath
,
"count"
,
deviceCount
)
slog
.
Debug
(
"detected GPUs"
,
"library"
,
libPath
,
"count"
,
deviceCount
)
gpuHandles
.
cudart
=
cudart
gpuHandles
.
cudart
=
cudart
gpuHandles
.
deviceCount
=
deviceCount
gpuHandles
.
deviceCount
=
deviceCount
return
gpuHandles
return
gpuHandles
...
@@ -184,10 +184,14 @@ func GetGPUInfo() GpuInfoList {
...
@@ -184,10 +184,14 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
:=
GpuInfo
{
gpuInfo
:=
GpuInfo
{
Library
:
"cuda"
,
Library
:
"cuda"
,
}
}
var
driverMajor
int
var
driverMinor
int
if
gpuHandles
.
cudart
!=
nil
{
if
gpuHandles
.
cudart
!=
nil
{
C
.
cudart_check_vram
(
*
gpuHandles
.
cudart
,
C
.
int
(
i
),
&
memInfo
)
C
.
cudart_check_vram
(
*
gpuHandles
.
cudart
,
C
.
int
(
i
),
&
memInfo
)
}
else
{
}
else
{
C
.
nvcuda_check_vram
(
*
gpuHandles
.
nvcuda
,
C
.
int
(
i
),
&
memInfo
)
C
.
nvcuda_check_vram
(
*
gpuHandles
.
nvcuda
,
C
.
int
(
i
),
&
memInfo
)
driverMajor
=
int
(
gpuHandles
.
nvcuda
.
driver_major
)
driverMinor
=
int
(
gpuHandles
.
nvcuda
.
driver_minor
)
}
}
if
memInfo
.
err
!=
nil
{
if
memInfo
.
err
!=
nil
{
slog
.
Info
(
"error looking up nvidia GPU memory"
,
"error"
,
C
.
GoString
(
memInfo
.
err
))
slog
.
Info
(
"error looking up nvidia GPU memory"
,
"error"
,
C
.
GoString
(
memInfo
.
err
))
...
@@ -201,10 +205,12 @@ func GetGPUInfo() GpuInfoList {
...
@@ -201,10 +205,12 @@ func GetGPUInfo() GpuInfoList {
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
TotalMemory
=
uint64
(
memInfo
.
total
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
FreeMemory
=
uint64
(
memInfo
.
free
)
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
ID
=
C
.
GoString
(
&
memInfo
.
gpu_id
[
0
])
gpuInfo
.
Major
=
int
(
memInfo
.
major
)
gpuInfo
.
Compute
=
fmt
.
Sprintf
(
"%d.%d"
,
memInfo
.
major
,
memInfo
.
minor
)
gpuInfo
.
Minor
=
int
(
memInfo
.
minor
)
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
MinimumMemory
=
cudaMinimumMemory
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
DependencyPath
=
depPath
gpuInfo
.
Name
=
C
.
GoString
(
&
memInfo
.
gpu_name
[
0
])
gpuInfo
.
DriverMajor
=
int
(
driverMajor
)
gpuInfo
.
DriverMinor
=
int
(
driverMinor
)
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp
=
append
(
resp
,
gpuInfo
)
resp
=
append
(
resp
,
gpuInfo
)
...
...
gpu/gpu_info.h
View file @
dc18eee3
...
@@ -39,16 +39,19 @@ extern "C" {
...
@@ -39,16 +39,19 @@ extern "C" {
#endif
#endif
#define GPU_ID_LEN 64
#define GPU_ID_LEN 64
#define GPU_NAME_LEN 96
typedef
struct
mem_info
{
typedef
struct
mem_info
{
char
*
err
;
// If non-nill, caller responsible for freeing
char
*
err
;
// If non-nill, caller responsible for freeing
char
gpu_id
[
GPU_ID_LEN
];
char
gpu_id
[
GPU_ID_LEN
];
char
gpu_name
[
GPU_NAME_LEN
];
uint64_t
total
;
uint64_t
total
;
uint64_t
free
;
uint64_t
free
;
// Compute Capability
// Compute Capability
int
major
;
int
major
;
int
minor
;
int
minor
;
int
patch
;
}
mem_info_t
;
}
mem_info_t
;
void
cpu_check_ram
(
mem_info_t
*
resp
);
void
cpu_check_ram
(
mem_info_t
*
resp
);
...
...
gpu/gpu_info_cpu.c
View file @
dc18eee3
...
@@ -10,8 +10,6 @@ void cpu_check_ram(mem_info_t *resp) {
...
@@ -10,8 +10,6 @@ void cpu_check_ram(mem_info_t *resp) {
if
(
GlobalMemoryStatusEx
(
&
info
)
!=
0
)
{
if
(
GlobalMemoryStatusEx
(
&
info
)
!=
0
)
{
resp
->
total
=
info
.
ullTotalPhys
;
resp
->
total
=
info
.
ullTotalPhys
;
resp
->
free
=
info
.
ullAvailPhys
;
resp
->
free
=
info
.
ullAvailPhys
;
resp
->
major
=
0
;
resp
->
minor
=
0
;
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
}
else
{
}
else
{
resp
->
err
=
LOAD_ERR
();
resp
->
err
=
LOAD_ERR
();
...
@@ -31,8 +29,6 @@ void cpu_check_ram(mem_info_t *resp) {
...
@@ -31,8 +29,6 @@ void cpu_check_ram(mem_info_t *resp) {
}
else
{
}
else
{
resp
->
total
=
info
.
totalram
*
info
.
mem_unit
;
resp
->
total
=
info
.
totalram
*
info
.
mem_unit
;
resp
->
free
=
info
.
freeram
*
info
.
mem_unit
;
resp
->
free
=
info
.
freeram
*
info
.
mem_unit
;
resp
->
major
=
0
;
resp
->
minor
=
0
;
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
snprintf
(
&
resp
->
gpu_id
[
0
],
GPU_ID_LEN
,
"0"
);
}
}
return
;
return
;
...
...
gpu/gpu_info_nvcuda.c
View file @
dc18eee3
...
@@ -22,6 +22,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
...
@@ -22,6 +22,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
{
"cuDeviceGet"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGet
},
{
"cuDeviceGet"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGet
},
{
"cuDeviceGetAttribute"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetAttribute
},
{
"cuDeviceGetAttribute"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetAttribute
},
{
"cuDeviceGetUuid"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetUuid
},
{
"cuDeviceGetUuid"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetUuid
},
{
"cuDeviceGetName"
,
(
void
*
)
&
resp
->
ch
.
cuDeviceGetName
},
{
"cuCtxCreate_v3"
,
(
void
*
)
&
resp
->
ch
.
cuCtxCreate_v3
},
{
"cuCtxCreate_v3"
,
(
void
*
)
&
resp
->
ch
.
cuCtxCreate_v3
},
{
"cuMemGetInfo_v2"
,
(
void
*
)
&
resp
->
ch
.
cuMemGetInfo_v2
},
{
"cuMemGetInfo_v2"
,
(
void
*
)
&
resp
->
ch
.
cuMemGetInfo_v2
},
{
"cuCtxDestroy"
,
(
void
*
)
&
resp
->
ch
.
cuCtxDestroy
},
{
"cuCtxDestroy"
,
(
void
*
)
&
resp
->
ch
.
cuCtxDestroy
},
...
@@ -70,18 +71,17 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
...
@@ -70,18 +71,17 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
}
}
int
version
=
0
;
int
version
=
0
;
nvcudaDriverVersion_t
driverVersion
;
resp
->
ch
.
driver_major
=
0
;
driverVersion
.
major
=
0
;
resp
->
ch
.
driver_minor
=
0
;
driverVersion
.
minor
=
0
;
// Report driver version if we're in verbose mode, ignore errors
// Report driver version if we're in verbose mode, ignore errors
ret
=
(
*
resp
->
ch
.
cuDriverGetVersion
)(
&
version
);
ret
=
(
*
resp
->
ch
.
cuDriverGetVersion
)(
&
version
);
if
(
ret
!=
CUDA_SUCCESS
)
{
if
(
ret
!=
CUDA_SUCCESS
)
{
LOG
(
resp
->
ch
.
verbose
,
"cuDriverGetVersion failed: %d
\n
"
,
ret
);
LOG
(
resp
->
ch
.
verbose
,
"cuDriverGetVersion failed: %d
\n
"
,
ret
);
}
else
{
}
else
{
driverVersion
.
major
=
version
/
1000
;
resp
->
ch
.
driver_
major
=
version
/
1000
;
driverVersion
.
minor
=
(
version
-
(
driverVersion
.
major
*
1000
))
/
10
;
resp
->
ch
.
driver_
minor
=
(
version
-
(
resp
->
ch
.
driver_
major
*
1000
))
/
10
;
LOG
(
resp
->
ch
.
verbose
,
"CUDA driver version: %d
-
%d
\n
"
,
driverVersion
.
major
,
driverVersion
.
minor
);
LOG
(
resp
->
ch
.
verbose
,
"CUDA driver version: %d
.
%d
\n
"
,
resp
->
ch
.
driver_major
,
resp
->
ch
.
driver_
minor
);
}
}
ret
=
(
*
resp
->
ch
.
cuDeviceGetCount
)(
&
resp
->
num_devices
);
ret
=
(
*
resp
->
ch
.
cuDeviceGetCount
)(
&
resp
->
num_devices
);
...
@@ -117,8 +117,6 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
...
@@ -117,8 +117,6 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
return
;
return
;
}
}
resp
->
major
=
0
;
resp
->
minor
=
0
;
int
major
=
0
;
int
major
=
0
;
int
minor
=
0
;
int
minor
=
0
;
ret
=
(
*
h
.
cuDeviceGetAttribute
)(
&
major
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
device
);
ret
=
(
*
h
.
cuDeviceGetAttribute
)(
&
major
,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
,
device
);
...
@@ -161,6 +159,12 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
...
@@ -161,6 +159,12 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
);
);
}
}
ret
=
(
*
h
.
cuDeviceGetName
)(
&
resp
->
gpu_name
[
0
],
GPU_NAME_LEN
,
device
);
if
(
ret
!=
CUDA_SUCCESS
)
{
LOG
(
h
.
verbose
,
"[%d] device name lookup failure: %d
\n
"
,
i
,
ret
);
resp
->
gpu_name
[
0
]
=
'\0'
;
}
// To get memory we have to set (and release) a context
// To get memory we have to set (and release) a context
ret
=
(
*
h
.
cuCtxCreate_v3
)(
&
ctx
,
NULL
,
0
,
0
,
device
);
ret
=
(
*
h
.
cuCtxCreate_v3
)(
&
ctx
,
NULL
,
0
,
0
,
device
);
if
(
ret
!=
CUDA_SUCCESS
)
{
if
(
ret
!=
CUDA_SUCCESS
)
{
...
...
gpu/gpu_info_nvcuda.h
View file @
dc18eee3
...
@@ -44,12 +44,15 @@ typedef void* CUcontext;
...
@@ -44,12 +44,15 @@ typedef void* CUcontext;
typedef
struct
nvcuda_handle
{
typedef
struct
nvcuda_handle
{
void
*
handle
;
void
*
handle
;
uint16_t
verbose
;
uint16_t
verbose
;
int
driver_major
;
int
driver_minor
;
CUresult
(
*
cuInit
)(
unsigned
int
Flags
);
CUresult
(
*
cuInit
)(
unsigned
int
Flags
);
CUresult
(
*
cuDriverGetVersion
)(
int
*
driverVersion
);
CUresult
(
*
cuDriverGetVersion
)(
int
*
driverVersion
);
CUresult
(
*
cuDeviceGetCount
)(
int
*
);
CUresult
(
*
cuDeviceGetCount
)(
int
*
);
CUresult
(
*
cuDeviceGet
)(
CUdevice
*
device
,
int
ordinal
);
CUresult
(
*
cuDeviceGet
)(
CUdevice
*
device
,
int
ordinal
);
CUresult
(
*
cuDeviceGetAttribute
)(
int
*
pi
,
CUdevice_attribute
attrib
,
CUdevice
dev
);
CUresult
(
*
cuDeviceGetAttribute
)(
int
*
pi
,
CUdevice_attribute
attrib
,
CUdevice
dev
);
CUresult
(
*
cuDeviceGetUuid
)(
CUuuid
*
uuid
,
CUdevice
dev
);
// signature compatible with cuDeviceGetUuid_v2
CUresult
(
*
cuDeviceGetUuid
)(
CUuuid
*
uuid
,
CUdevice
dev
);
// signature compatible with cuDeviceGetUuid_v2
CUresult
(
*
cuDeviceGetName
)(
char
*
name
,
int
len
,
CUdevice
dev
);
// Context specific aspects
// Context specific aspects
CUresult
(
*
cuCtxCreate_v3
)(
CUcontext
*
pctx
,
void
*
params
,
int
len
,
unsigned
int
flags
,
CUdevice
dev
);
CUresult
(
*
cuCtxCreate_v3
)(
CUcontext
*
pctx
,
void
*
params
,
int
len
,
unsigned
int
flags
,
CUdevice
dev
);
...
...
gpu/types.go
View file @
dc18eee3
package
gpu
package
gpu
import
(
"fmt"
"log/slog"
"github.com/ollama/ollama/format"
)
type
memInfo
struct
{
type
memInfo
struct
{
TotalMemory
uint64
`json:"total_memory,omitempty"`
TotalMemory
uint64
`json:"total_memory,omitempty"`
FreeMemory
uint64
`json:"free_memory,omitempty"`
FreeMemory
uint64
`json:"free_memory,omitempty"`
...
@@ -20,11 +27,13 @@ type GpuInfo struct {
...
@@ -20,11 +27,13 @@ type GpuInfo struct {
DependencyPath
string
`json:"lib_path,omitempty"`
DependencyPath
string
`json:"lib_path,omitempty"`
// GPU information
// GPU information
ID
string
`json:"gpu_id"`
// string to use for selection of this specific GPU
ID
string
`json:"gpu_id"`
// string to use for selection of this specific GPU
Name
string
`json:"name"`
// user friendly name if available
Name
string
`json:"name"`
// user friendly name if available
Major
int
`json:"major,omitempty"`
// Major compatibility version (CC or gfx)
Compute
string
`json:"compute"`
// Compute Capability or gfx
Minor
int
`json:"minor,omitempty"`
// Minor compatibility version (CC or gfx)
Patch
int
`json:"patch,omitempty"`
// Patch compatibility only matters on AMD
// Driver Information - TODO no need to put this on each GPU
DriverMajor
int
`json:"driver_major,omitempty"`
DriverMinor
int
`json:"driver_minor,omitempty"`
// TODO other performance capability info to help in scheduling decisions
// TODO other performance capability info to help in scheduling decisions
}
}
...
@@ -56,6 +65,21 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
...
@@ -56,6 +65,21 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
return
resp
return
resp
}
}
// Report the GPU information into the log an Info level
func
(
l
GpuInfoList
)
LogDetails
()
{
for
_
,
g
:=
range
l
{
slog
.
Info
(
"inference compute"
,
"id"
,
g
.
ID
,
"library"
,
g
.
Library
,
"compute"
,
g
.
Compute
,
"driver"
,
fmt
.
Sprintf
(
"%d.%d"
,
g
.
DriverMajor
,
g
.
DriverMinor
),
"name"
,
g
.
Name
,
"total"
,
format
.
HumanBytes2
(
g
.
TotalMemory
),
"available"
,
format
.
HumanBytes2
(
g
.
FreeMemory
),
)
}
}
// Sort by Free Space
// Sort by Free Space
type
ByFreeMemory
[]
GpuInfo
type
ByFreeMemory
[]
GpuInfo
...
...
server/routes.go
View file @
dc18eee3
...
@@ -1065,7 +1065,8 @@ func Serve(ln net.Listener) error {
...
@@ -1065,7 +1065,8 @@ func Serve(ln net.Listener) error {
// At startup we retrieve GPU information so we can get log messages before loading a model
// At startup we retrieve GPU information so we can get log messages before loading a model
// This will log warnings to the log in case we have problems with detected GPUs
// This will log warnings to the log in case we have problems with detected GPUs
_
=
gpu
.
GetGPUInfo
()
gpus
:=
gpu
.
GetGPUInfo
()
gpus
.
LogDetails
()
return
srvr
.
Serve
(
ln
)
return
srvr
.
Serve
(
ln
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment