types.go 5.82 KB
Newer Older
1
package discover
2

Daniel Hiltgen's avatar
Daniel Hiltgen committed
3
import (
4
	"context"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
5
	"log/slog"
6
7
8
	"path/filepath"
	"runtime"
	"strings"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
9
10

	"github.com/ollama/ollama/format"
11
	"github.com/ollama/ollama/ml"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
12
13
)

14
type memInfo struct {
15
16
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
17
	FreeSwap    uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
18
19
20
}

// Beginning of an `ollama info` command
21
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
22
	ml.DeviceID
23
	memInfo
24

25
	// Optional variant to select (e.g. versions, cpu feature flags)
26
	Variant string `json:"variant"`
27

Michael Yang's avatar
Michael Yang committed
28
	// MinimumMemory represents the minimum memory required to use the GPU
Michael Yang's avatar
Michael Yang committed
29
	MinimumMemory uint64 `json:"-"`
Michael Yang's avatar
Michael Yang committed
30

Daniel Hiltgen's avatar
Daniel Hiltgen committed
31
	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
32
	DependencyPath []string `json:"lib_path,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
33

34
35
36
37
38
	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
39
	// GPU information
40
41
42
43
	filterID     string // AMD Workaround: The numeric ID of the device used to filter out other devices
	Name         string `json:"name"`          // user friendly name if available
	ComputeMajor int    `json:"compute_major"` // Compute Capability or gfx
	ComputeMinor int    `json:"compute_minor"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
45
46
47

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
48
49

	// TODO other performance capability info to help in scheduling decisions
50
}
51

52
53
54
55
56
57
58
func (gpu GpuInfo) RunnerName() string {
	if gpu.Variant != "" {
		return gpu.Library + "_" + gpu.Variant
	}
	return gpu.Library
}

59
60
type CPUInfo struct {
	GpuInfo
61
62
63
64
65
66
67
68
69
70
71
	CPUs []CPU
}

// CPU type represents a CPU Package occupying a socket
type CPU struct {
	ID                  string `cpuinfo:"processor"`
	VendorID            string `cpuinfo:"vendor_id"`
	ModelName           string `cpuinfo:"model name"`
	CoreCount           int
	EfficiencyCoreCount int // Performance = CoreCount - Efficiency
	ThreadCount         int
72
73
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
74
75
76
77
78
79
80
81
type GpuInfoList []GpuInfo

func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
Michael Yang's avatar
Michael Yang committed
82
		if info.Variant != "" {
83
			requested += "_" + info.Variant
Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
85
86
87
88
89
90
91
92
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
93
			libs = append(libs, requested)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
94
95
96
97
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
98
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
99

100
101
102
103
104
105
106
107
108
109
110
111
func LogDetails(devices []ml.DeviceInfo) {
	for _, dev := range devices {
		var libs []string
		for _, dir := range dev.LibraryPath {
			if strings.Contains(dir, filepath.Join("lib", "ollama")) {
				libs = append(libs, filepath.Base(dir))
			}
		}
		typeStr := "discrete"
		if dev.Integrated {
			typeStr = "iGPU"
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
112
		slog.Info("inference compute",
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
			"id", dev.ID,
			"library", dev.Library,
			"compute", dev.Compute(),
			"name", dev.Name,
			"description", dev.Description,
			"libdirs", strings.Join(libs, ","),
			"driver", dev.Driver(),
			"pci_id", dev.PCIID,
			"type", typeStr,
			"total", format.HumanBytes2(dev.TotalMemory),
			"available", format.HumanBytes2(dev.FreeMemory),
		)
	}
	// CPU inference
	if len(devices) == 0 {
		dev, _ := GetCPUMem()
		slog.Info("inference compute",
			"id", "cpu",
			"library", "cpu",
			"compute", "",
			"name", "cpu",
			"description", "cpu",
			"libdirs", "ollama",
			"driver", "",
			"pci_id", "",
			"type", "",
			"total", format.HumanBytes2(dev.TotalMemory),
			"available", format.HumanBytes2(dev.FreeMemory),
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
142
143
144
		)
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
146
147
148
149
150
// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
151

152
type SystemInfo struct {
153
154
	System CPUInfo   `json:"system"`
	GPUs   []GpuInfo `json:"gpus"`
155
}
156
157
158
159

// Return the optimal number of threads to use for inference
func (si SystemInfo) GetOptimalThreadCount() int {
	if len(si.System.CPUs) == 0 {
160
161
		// Fall back to Go's num CPU
		return runtime.NumCPU()
162
	}
163
164
165
166
167
168
169

	coreCount := 0
	for _, c := range si.System.CPUs {
		coreCount += c.CoreCount - c.EfficiencyCoreCount
	}

	return coreCount
170
}
171
172
173
174

// For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool {
	for _, gpu := range l {
175
		supportsFA := gpu.Library == "cpu" ||
176
			gpu.Name == "Metal" || gpu.Library == "Metal" ||
177
			(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || // We don't have kernels for Jetson Xavier
178
			gpu.Library == "ROCm"
179
180
181
182
183
184
185

		if !supportsFA {
			return false
		}
	}
	return true
}
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

type BaseRunner interface {
	// GetPort returns the localhost port number the runner is running on
	GetPort() int

	// HasExited indicates if the runner is no longer running.  This can be used during
	// bootstrap to detect if a given filtered device is incompatible and triggered an assert
	HasExited() bool
}

type RunnerDiscovery interface {
	BaseRunner

	// GetDeviceInfos will perform a query of the underlying device libraries
	// for device identification and free VRAM information
	// During bootstrap scenarios, this routine may take seconds to complete
	GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
}

type FilteredRunnerDiscovery interface {
	RunnerDiscovery

	// GetActiveDeviceIDs returns the filtered set of devices actively in
	// use by this runner for running models.  If the runner is a bootstrap runner, no devices
	// will be active yet so no device IDs are returned.
	// This routine will not query the underlying device and will return immediately
	GetActiveDeviceIDs() []ml.DeviceID
}