types.go 3.61 KB
Newer Older
1
2
package gpu

Daniel Hiltgen's avatar
Daniel Hiltgen committed
3
4
5
6
7
8
9
import (
	"fmt"
	"log/slog"

	"github.com/ollama/ollama/format"
)

10
type memInfo struct {
11
12
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
13
14
15
16
17
18
}

// Beginning of an `ollama info` command
type GpuInfo struct {
	memInfo
	Library string `json:"library,omitempty"`
19

20
	// Optional variant to select (e.g. versions, cpu feature flags)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
21
	Variant CPUCapability `json:"variant"`
22

Michael Yang's avatar
Michael Yang committed
23
	// MinimumMemory represents the minimum memory required to use the GPU
Michael Yang's avatar
Michael Yang committed
24
	MinimumMemory uint64 `json:"-"`
Michael Yang's avatar
Michael Yang committed
25

Daniel Hiltgen's avatar
Daniel Hiltgen committed
26
27
28
	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
	DependencyPath string `json:"lib_path,omitempty"`

Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
30
31
	// Extra environment variables specific to the GPU as list of [key,value]
	EnvWorkarounds [][2]string `json:"envs,omitempty"`

32
33
34
35
36
	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
37
	// GPU information
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
39
40
41
42
43
44
	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
	Name    string `json:"name"`    // user friendly name if available
	Compute string `json:"compute"` // Compute Capability or gfx

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`
Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46

	// TODO other performance capability info to help in scheduling decisions
47
}
48

49
50
51
52
53
54
type CPUInfo struct {
	GpuInfo
}

type CudaGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
	index int //nolint:unused,nolintlint
56
57
58
59
60
}
type CudaGPUInfoList []CudaGPUInfo

type RocmGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
61
62
	usedFilepath string //nolint:unused,nolintlint
	index        int    //nolint:unused,nolintlint
63
64
65
66
67
}
type RocmGPUInfoList []RocmGPUInfo

type OneapiGPUInfo struct {
	GpuInfo
Daniel Hiltgen's avatar
Daniel Hiltgen committed
68
69
	driverIndex int //nolint:unused,nolintlint
	gpuIndex    int //nolint:unused,nolintlint
70
71
72
}
type OneapiGPUInfoList []OneapiGPUInfo

Daniel Hiltgen's avatar
Daniel Hiltgen committed
73
74
75
76
77
78
79
80
81
type GpuInfoList []GpuInfo

// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
Daniel Hiltgen's avatar
Daniel Hiltgen committed
82
83
		if info.Variant != CPUCapabilityNone {
			requested += "_" + info.Variant.String()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
			libs = append(libs, info.Library)
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
98
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
99

Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
	for _, g := range l {
		slog.Info("inference compute",
			"id", g.ID,
			"library", g.Library,
			"compute", g.Compute,
			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			"name", g.Name,
			"total", format.HumanBytes2(g.TotalMemory),
			"available", format.HumanBytes2(g.FreeMemory),
		)
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
115
116
117
118
119
120
// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
121
122
123
124
125
126
127

type CPUCapability uint32

// Override at build time when building base GPU runners
var GPURunnerCPUCapability = CPUCapabilityAVX

const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
128
	CPUCapabilityNone CPUCapability = iota
129
130
131
132
133
	CPUCapabilityAVX
	CPUCapabilityAVX2
	// TODO AVX512
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
func (c CPUCapability) String() string {
135
136
137
138
139
140
	switch c {
	case CPUCapabilityAVX:
		return "avx"
	case CPUCapabilityAVX2:
		return "avx2"
	default:
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
		return "no vector extensions"
142
143
	}
}