types.go 4.2 KB
Newer Older
1
2
package gpu

Daniel Hiltgen's avatar
Daniel Hiltgen committed
3
4
5
6
7
8
9
import (
	"fmt"
	"log/slog"

	"github.com/ollama/ollama/format"
)

10
type memInfo struct {
11
12
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
13
	FreeSwap    uint64 `json:"free_swap,omitempty"`
14
15
16
17
18
19
}

// Beginning of an `ollama info` command
type GpuInfo struct {
	memInfo
	Library string `json:"library,omitempty"`
20

21
	// Optional variant to select (e.g. versions, cpu feature flags)
22
	Variant string `json:"variant"`
23

Michael Yang's avatar
Michael Yang committed
24
	// MinimumMemory represents the minimum memory required to use the GPU
Michael Yang's avatar
Michael Yang committed
25
	MinimumMemory uint64 `json:"-"`
Michael Yang's avatar
Michael Yang committed
26

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
28
29
	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
	DependencyPath string `json:"lib_path,omitempty"`

Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
31
32
	// Extra environment variables specific to the GPU as list of [key,value]
	EnvWorkarounds [][2]string `json:"envs,omitempty"`

33
34
35
36
37
	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	// GPU information
Daniel Hiltgen's avatar
Daniel Hiltgen committed
39
40
41
42
43
44
45
	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
	Name    string `json:"name"`    // user friendly name if available
	Compute string `json:"compute"` // Compute Capability or gfx

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
46
47

	// TODO other performance capability info to help in scheduling decisions
48
}
49

50
51
52
53
54
55
type CPUInfo struct {
	GpuInfo
}

type CudaGPUInfo struct {
	GpuInfo
56
57
58
59
	OSOverhead   uint64 // Memory overhead between the driver library and management library
	index        int    //nolint:unused,nolintlint
	computeMajor int    //nolint:unused,nolintlint
	computeMinor int    //nolint:unused,nolintlint
60
61
62
63
64
}
type CudaGPUInfoList []CudaGPUInfo

type RocmGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
65
66
	usedFilepath string //nolint:unused,nolintlint
	index        int    //nolint:unused,nolintlint
67
68
69
70
71
}
type RocmGPUInfoList []RocmGPUInfo

type OneapiGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
72
73
	driverIndex int //nolint:unused,nolintlint
	gpuIndex    int //nolint:unused,nolintlint
74
75
76
}
type OneapiGPUInfoList []OneapiGPUInfo

Daniel Hiltgen's avatar
Daniel Hiltgen committed
77
78
type GpuInfoList []GpuInfo

79
80
81
82
83
type UnsupportedGPUInfo struct {
	GpuInfo
	Reason string `json:"reason"`
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
85
86
87
88
89
90
// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
91
92
		if info.Variant != CPUCapabilityNone.String() {
			requested += "_" + info.Variant
Daniel Hiltgen's avatar
Daniel Hiltgen committed
93
94
95
96
97
98
99
100
101
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
102
			libs = append(libs, requested)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
103
104
105
106
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
107
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
108

Daniel Hiltgen's avatar
Daniel Hiltgen committed
109
110
111
112
113
114
// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
	for _, g := range l {
		slog.Info("inference compute",
			"id", g.ID,
			"library", g.Library,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
115
			"variant", g.Variant,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
116
117
118
119
120
121
122
123
124
			"compute", g.Compute,
			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			"name", g.Name,
			"total", format.HumanBytes2(g.TotalMemory),
			"available", format.HumanBytes2(g.FreeMemory),
		)
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
126
127
128
129
130
// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
131
132
133
134
135
136
137

type CPUCapability uint32

// Override at build time when building base GPU runners
var GPURunnerCPUCapability = CPUCapabilityAVX

const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
	CPUCapabilityNone CPUCapability = iota
139
140
141
142
143
	CPUCapabilityAVX
	CPUCapabilityAVX2
	// TODO AVX512
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
func (c CPUCapability) String() string {
145
146
147
148
149
150
	switch c {
	case CPUCapabilityAVX:
		return "avx"
	case CPUCapabilityAVX2:
		return "avx2"
	default:
Daniel Hiltgen's avatar
Daniel Hiltgen committed
151
		return "no vector extensions"
152
153
	}
}
154
155
156
157
158
159
160

type SystemInfo struct {
	System          CPUInfo              `json:"system"`
	GPUs            []GpuInfo            `json:"gpus"`
	UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
	DiscoveryErrors []string             `json:"discovery_errors"`
}