types.go 3.76 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
package gpu

import (
	"fmt"
	"log/slog"

	"github.com/ollama/ollama/format"
)

type memInfo struct {
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
xuxzh1's avatar
init  
xuxzh1 committed
13
	FreeSwap    uint64 `json:"free_swap,omitempty"`
mashun1's avatar
v1  
mashun1 committed
14
15
16
17
18
19
20
21
}

// Beginning of an `ollama info` command
type GpuInfo struct {
	memInfo
	Library string `json:"library,omitempty"`

	// Optional variant to select (e.g. versions, cpu feature flags)
xuxzh1's avatar
init  
xuxzh1 committed
22
	Variant CPUCapability `json:"variant"`
mashun1's avatar
v1  
mashun1 committed
23
24
25
26
27
28
29

	// MinimumMemory represents the minimum memory required to use the GPU
	MinimumMemory uint64 `json:"-"`

	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
	DependencyPath string `json:"lib_path,omitempty"`

xuxzh1's avatar
init  
xuxzh1 committed
30
31
32
33
34
35
36
37
	// Extra environment variables specific to the GPU as list of [key,value]
	EnvWorkarounds [][2]string `json:"envs,omitempty"`

	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

mashun1's avatar
v1  
mashun1 committed
38
39
40
41
42
43
44
45
46
47
48
49
	// GPU information
	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
	Name    string `json:"name"`    // user friendly name if available
	Compute string `json:"compute"` // Compute Capability or gfx

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`

	// TODO other performance capability info to help in scheduling decisions
}

xuxzh1's avatar
init  
xuxzh1 committed
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
type CPUInfo struct {
	GpuInfo
}

type CudaGPUInfo struct {
	GpuInfo
	OSOverhead uint64 // Memory overhead between the driver library and management library
	index      int    //nolint:unused,nolintlint
}
type CudaGPUInfoList []CudaGPUInfo

type RocmGPUInfo struct {
	GpuInfo
	usedFilepath string //nolint:unused,nolintlint
	index        int    //nolint:unused,nolintlint
}
type RocmGPUInfoList []RocmGPUInfo

type OneapiGPUInfo struct {
	GpuInfo
	driverIndex int //nolint:unused,nolintlint
	gpuIndex    int //nolint:unused,nolintlint
}
type OneapiGPUInfoList []OneapiGPUInfo

mashun1's avatar
v1  
mashun1 committed
75
76
77
78
79
80
81
82
83
type GpuInfoList []GpuInfo

// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
xuxzh1's avatar
init  
xuxzh1 committed
84
85
		if info.Variant != CPUCapabilityNone {
			requested += "_" + info.Variant.String()
mashun1's avatar
v1  
mashun1 committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
			libs = append(libs, info.Library)
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
}

// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
	for _, g := range l {
		slog.Info("inference compute",
			"id", g.ID,
			"library", g.Library,
			"compute", g.Compute,
			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			"name", g.Name,
			"total", format.HumanBytes2(g.TotalMemory),
			"available", format.HumanBytes2(g.FreeMemory),
		)
	}
}

// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
xuxzh1's avatar
init  
xuxzh1 committed
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145

type CPUCapability uint32

// Override at build time when building base GPU runners
var GPURunnerCPUCapability = CPUCapabilityAVX

const (
	CPUCapabilityNone CPUCapability = iota
	CPUCapabilityAVX
	CPUCapabilityAVX2
	// TODO AVX512
)

func (c CPUCapability) String() string {
	switch c {
	case CPUCapabilityAVX:
		return "avx"
	case CPUCapabilityAVX2:
		return "avx2"
	default:
		return "no vector extensions"
	}
}