types.go 5.08 KB
Newer Older
1
package discover
2

Daniel Hiltgen's avatar
Daniel Hiltgen committed
3
4
5
6
7
8
9
import (
	"fmt"
	"log/slog"

	"github.com/ollama/ollama/format"
)

10
type memInfo struct {
11
12
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
13
	FreeSwap    uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
14
15
16
}

// Beginning of an `ollama info` command
17
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
18
19
	memInfo
	Library string `json:"library,omitempty"`
20

21
	// Optional variant to select (e.g. versions, cpu feature flags)
22
	Variant string `json:"variant"`
23

Michael Yang's avatar
Michael Yang committed
24
	// MinimumMemory represents the minimum memory required to use the GPU
Michael Yang's avatar
Michael Yang committed
25
	MinimumMemory uint64 `json:"-"`
Michael Yang's avatar
Michael Yang committed
26

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
28
	DependencyPath []string `json:"lib_path,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
29

30
31
	// Extra environment variables specific to the GPU as list of [key=value]
	EnvWorkarounds []string `json:"envs,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32

33
34
35
36
37
	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	// GPU information
39
40
41
42
	ID       string `json:"gpu_id"` // string to use for selection of this specific GPU
	filterID int    //nolint:unused,nolintlint // AMD Workaround: The numeric ID of the device used to filter out other devices
	Name     string `json:"name"`    // user friendly name if available
	Compute  string `json:"compute"` // Compute Capability or gfx
Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
44
45
46

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
47
48

	// TODO other performance capability info to help in scheduling decisions
49
}
50

51
52
53
54
55
56
57
func (gpu GpuInfo) RunnerName() string {
	if gpu.Variant != "" {
		return gpu.Library + "_" + gpu.Variant
	}
	return gpu.Library
}

58
59
type CPUInfo struct {
	GpuInfo
60
61
62
63
64
65
66
67
68
69
70
	CPUs []CPU
}

// CPU type represents a CPU Package occupying a socket
type CPU struct {
	ID                  string `cpuinfo:"processor"`
	VendorID            string `cpuinfo:"vendor_id"`
	ModelName           string `cpuinfo:"model name"`
	CoreCount           int
	EfficiencyCoreCount int // Performance = CoreCount - Efficiency
	ThreadCount         int
71
72
73
74
}

type CudaGPUInfo struct {
	GpuInfo
75
76
77
78
	OSOverhead   uint64 // Memory overhead between the driver library and management library
	index        int    //nolint:unused,nolintlint
	computeMajor int    //nolint:unused,nolintlint
	computeMinor int    //nolint:unused,nolintlint
79
80
81
82
83
}
type CudaGPUInfoList []CudaGPUInfo

type RocmGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
85
	usedFilepath string //nolint:unused,nolintlint
	index        int    //nolint:unused,nolintlint
86
87
88
89
90
}
type RocmGPUInfoList []RocmGPUInfo

type OneapiGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
92
	driverIndex int //nolint:unused,nolintlint
	gpuIndex    int //nolint:unused,nolintlint
93
94
95
}
type OneapiGPUInfoList []OneapiGPUInfo

Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
97
type GpuInfoList []GpuInfo

98
99
100
101
102
type UnsupportedGPUInfo struct {
	GpuInfo
	Reason string `json:"reason"`
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
103
104
105
106
107
108
109
// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
Michael Yang's avatar
Michael Yang committed
110
		if info.Variant != "" {
111
			requested += "_" + info.Variant
Daniel Hiltgen's avatar
Daniel Hiltgen committed
112
113
114
115
116
117
118
119
120
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
121
			libs = append(libs, requested)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
123
124
125
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
126
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
127

Daniel Hiltgen's avatar
Daniel Hiltgen committed
128
129
130
131
132
133
// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
	for _, g := range l {
		slog.Info("inference compute",
			"id", g.ID,
			"library", g.Library,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
			"variant", g.Variant,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
136
137
138
139
140
141
142
143
			"compute", g.Compute,
			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			"name", g.Name,
			"total", format.HumanBytes2(g.TotalMemory),
			"available", format.HumanBytes2(g.FreeMemory),
		)
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
145
146
147
148
149
// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
150

151
152
153
154
155
156
type SystemInfo struct {
	System          CPUInfo              `json:"system"`
	GPUs            []GpuInfo            `json:"gpus"`
	UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
	DiscoveryErrors []string             `json:"discovery_errors"`
}
157
158
159
160
161
162

// Return the optimal number of threads to use for inference
func (si SystemInfo) GetOptimalThreadCount() int {
	if len(si.System.CPUs) == 0 {
		return 0
	}
163
164
165
166
167
168
169

	coreCount := 0
	for _, c := range si.System.CPUs {
		coreCount += c.CoreCount - c.EfficiencyCoreCount
	}

	return coreCount
170
}
171
172
173
174

// For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool {
	for _, gpu := range l {
175
176
		supportsFA := gpu.Library == "cpu" ||
			gpu.Library == "metal" ||
177
178
179
180
181
182
183
184
185
			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
			gpu.Library == "rocm"

		if !supportsFA {
			return false
		}
	}
	return true
}