cpu_windows.go 6.36 KB
Newer Older
1
package discover
Daniel Hiltgen's avatar
Daniel Hiltgen committed
2
3
4

import (
	"fmt"
5
	"log/slog"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
6
7
	"syscall"
	"unsafe"
8
9

	"github.com/ollama/ollama/logutil"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
)

type MEMORYSTATUSEX struct {
	length               uint32
	MemoryLoad           uint32
	TotalPhys            uint64
	AvailPhys            uint64
	TotalPageFile        uint64
	AvailPageFile        uint64
	TotalVirtual         uint64
	AvailVirtual         uint64
	AvailExtendedVirtual uint64
}

var (
25
26
27
28
	k32                              = syscall.NewLazyDLL("kernel32.dll")
	globalMemoryStatusExProc         = k32.NewProc("GlobalMemoryStatusEx")
	sizeofMemoryStatusEx             = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
	GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
30
31
32
33
34
35
36
)

func GetCPUMem() (memInfo, error) {
	memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
	r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
	if r1 == 0 {
		return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
	}
37
	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
}
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

type LOGICAL_PROCESSOR_RELATIONSHIP uint32

const (
	RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
	RelationNumaNode
	RelationCache
	RelationProcessorPackage
	RelationGroup
	RelationProcessorDie
	RelationNumaNodeEx
	RelationProcessorModule
)
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff

type GROUP_AFFINITY struct {
	Mask     uintptr // KAFFINITY
	Group    uint16
	Reserved [3]uint16
}

type PROCESSOR_RELATIONSHIP struct {
	Flags           byte
	EfficiencyClass byte
	Reserved        [20]byte
	GroupCount      uint16
	GroupMask       [1]GROUP_AFFINITY // len GroupCount
}

// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP

type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
	Relationship LOGICAL_PROCESSOR_RELATIONSHIP
	Size         uint32
	U            [1]byte // Union len Size
	// PROCESSOR_RELATIONSHIP
	// NUMA_NODE_RELATIONSHIP
	// CACHE_RELATIONSHIP
	// GROUP_RELATIONSHIP
}

func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
	if group == nil || target == nil {
		return false
	}
	return group.Mask&target.Mask != 0
}

type winPackage struct {
	groups              []*GROUP_AFFINITY
	coreCount           int // performance cores = coreCount - efficiencyCoreCount
	efficiencyCoreCount int
	threadCount         int
}

func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
	for _, group := range pkg.groups {
		if group.IsMember(target) {
			return true
		}
	}
	return false
}

func getLogicalProcessorInformationEx() ([]byte, error) {
104
	buf := make([]byte, 1)
105
	bufSize := len(buf)
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
	ret, _, err := GetLogicalProcessorInformationEx.Call(
		uintptr(RelationAll),
		uintptr(unsafe.Pointer(&buf[0])),
		uintptr(unsafe.Pointer(&bufSize)),
	)
	if ret != 0 {
		logutil.Trace("failed to retrieve CPU payload size", "ret", ret, "size", bufSize, "error", err)
		return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
	}

	buf = make([]byte, bufSize)
	ret, _, err = GetLogicalProcessorInformationEx.Call(
		uintptr(RelationAll),
		uintptr(unsafe.Pointer(&buf[0])),
		uintptr(unsafe.Pointer(&bufSize)),
	)
	if ret == 0 {
		logutil.Trace("failed to retrieve CPU information", "ret", ret, "size", len(buf), "new_size", bufSize, "error", err)
		return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
125
	}
126
	return buf, nil
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
}

func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
	var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
	// Find all the packages first
	packages := []*winPackage{}
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorPackage {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		pkg := &winPackage{}
		ga0 := unsafe.Pointer(&pr.GroupMask[0])
		for j := range pr.GroupCount {
			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
			pkg.groups = append(pkg.groups, gm)
		}
		packages = append(packages, pkg)
	}

	slog.Info("packages", "count", len(packages))

	// To identify efficiency cores we have to compare the relative values
	// Larger values are "less efficient" (aka, more performant)
	var maxEfficiencyClass byte
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorCore {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		if pr.EfficiencyClass > maxEfficiencyClass {
			maxEfficiencyClass = pr.EfficiencyClass
		}
	}
	if maxEfficiencyClass > 0 {
		slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
	}

	// then match up the Cores to the Packages, count up cores, threads and efficiency cores
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorCore {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		ga0 := unsafe.Pointer(&pr.GroupMask[0])
		for j := range pr.GroupCount {
			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
			for _, pkg := range packages {
				if pkg.IsMember(gm) {
					pkg.coreCount++
					if pr.Flags == 0 {
						pkg.threadCount++
					} else {
						pkg.threadCount += 2
					}
					if pr.EfficiencyClass < maxEfficiencyClass {
						pkg.efficiencyCoreCount++
					}
				}
			}
		}
	}

193
	// Summarize the results
194
195
196
197
198
199
200
	for i, pkg := range packages {
		slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
	}

	return packages
}

201
func GetCPUDetails() []CPU {
202
203
	buf, err := getLogicalProcessorInformationEx()
	if err != nil {
204
205
		slog.Warn("failed to get CPU details", "error", err)
		return nil
206
207
208
209
210
211
212
213
214
	}
	packages := processSystemLogicalProcessorInforationList(buf)
	cpus := make([]CPU, len(packages))

	for i, pkg := range packages {
		cpus[i].CoreCount = pkg.coreCount
		cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
		cpus[i].ThreadCount = pkg.threadCount
	}
215
216
217
218
219
220
	return cpus
}

func IsNUMA() bool {
	// numa support in ggml is linux only
	return false
221
}