gpu_windows.go 6.49 KB
Newer Older
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1
2
3
4
package gpu

import (
	"fmt"
5
	"log/slog"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
	"syscall"
	"unsafe"
)

type MEMORYSTATUSEX struct {
	length               uint32
	MemoryLoad           uint32
	TotalPhys            uint64
	AvailPhys            uint64
	TotalPageFile        uint64
	AvailPageFile        uint64
	TotalVirtual         uint64
	AvailVirtual         uint64
	AvailExtendedVirtual uint64
}

var (
23
24
25
26
	k32                              = syscall.NewLazyDLL("kernel32.dll")
	globalMemoryStatusExProc         = k32.NewProc("GlobalMemoryStatusEx")
	sizeofMemoryStatusEx             = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
	GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
)

var CudartGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

var NvmlGlobs = []string{
	"c:\\Windows\\System32\\nvml.dll",
}

var NvcudaGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

var OneapiGlobs = []string{
	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

Michael Yang's avatar
lint  
Michael Yang committed
45
46
47
48
49
50
var (
	CudartMgmtName = "cudart64_*.dll"
	NvcudaMgmtName = "nvcuda.dll"
	NvmlMgmtName   = "nvml.dll"
	OneapiMgmtName = "ze_intel_gpu64.dll"
)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
51
52
53
54
55
56
57

func GetCPUMem() (memInfo, error) {
	memStatus := MEMORYSTATUSEX{length: sizeofMemoryStatusEx}
	r1, _, err := globalMemoryStatusExProc.Call(uintptr(unsafe.Pointer(&memStatus)))
	if r1 == 0 {
		return memInfo{}, fmt.Errorf("GlobalMemoryStatusEx failed: %w", err)
	}
58
	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
59
}
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

type LOGICAL_PROCESSOR_RELATIONSHIP uint32

const (
	RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
	RelationNumaNode
	RelationCache
	RelationProcessorPackage
	RelationGroup
	RelationProcessorDie
	RelationNumaNodeEx
	RelationProcessorModule
)
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff

type GROUP_AFFINITY struct {
	Mask     uintptr // KAFFINITY
	Group    uint16
	Reserved [3]uint16
}

type PROCESSOR_RELATIONSHIP struct {
	Flags           byte
	EfficiencyClass byte
	Reserved        [20]byte
	GroupCount      uint16
	GroupMask       [1]GROUP_AFFINITY // len GroupCount
}

// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP

type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
	Relationship LOGICAL_PROCESSOR_RELATIONSHIP
	Size         uint32
	U            [1]byte // Union len Size
	// PROCESSOR_RELATIONSHIP
	// NUMA_NODE_RELATIONSHIP
	// CACHE_RELATIONSHIP
	// GROUP_RELATIONSHIP
}

func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
	if group == nil || target == nil {
		return false
	}
	return group.Mask&target.Mask != 0
}

type winPackage struct {
	groups              []*GROUP_AFFINITY
	coreCount           int // performance cores = coreCount - efficiencyCoreCount
	efficiencyCoreCount int
	threadCount         int
}

func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
	for _, group := range pkg.groups {
		if group.IsMember(target) {
			return true
		}
	}
	return false
}

func getLogicalProcessorInformationEx() ([]byte, error) {
	buf := make([]byte, 1)
	bufSize := len(buf)
	ret, _, err := GetLogicalProcessorInformationEx.Call(
		uintptr(RelationAll),
		uintptr(unsafe.Pointer(&buf[0])),
		uintptr(unsafe.Pointer(&bufSize)),
	)
	if ret != 0 {
		return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
	}

	buf = make([]byte, bufSize)
	ret, _, err = GetLogicalProcessorInformationEx.Call(
		uintptr(RelationAll),
		uintptr(unsafe.Pointer(&buf[0])),
		uintptr(unsafe.Pointer(&bufSize)),
	)
	if ret == 0 {
		return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
	}
	return buf, nil
}

func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
	var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
	// Find all the packages first
	packages := []*winPackage{}
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorPackage {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		pkg := &winPackage{}
		ga0 := unsafe.Pointer(&pr.GroupMask[0])
		for j := range pr.GroupCount {
			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
			pkg.groups = append(pkg.groups, gm)
		}
		packages = append(packages, pkg)
	}

	slog.Info("packages", "count", len(packages))

	// To identify efficiency cores we have to compare the relative values
	// Larger values are "less efficient" (aka, more performant)
	var maxEfficiencyClass byte
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorCore {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		if pr.EfficiencyClass > maxEfficiencyClass {
			maxEfficiencyClass = pr.EfficiencyClass
		}
	}
	if maxEfficiencyClass > 0 {
		slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
	}

	// then match up the Cores to the Packages, count up cores, threads and efficiency cores
	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
		if slpi.Relationship != RelationProcessorCore {
			continue
		}
		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
		ga0 := unsafe.Pointer(&pr.GroupMask[0])
		for j := range pr.GroupCount {
			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
			for _, pkg := range packages {
				if pkg.IsMember(gm) {
					pkg.coreCount++
					if pr.Flags == 0 {
						pkg.threadCount++
					} else {
						pkg.threadCount += 2
					}
					if pr.EfficiencyClass < maxEfficiencyClass {
						pkg.efficiencyCoreCount++
					}
				}
			}
		}
	}

	// Sumarize the results
	for i, pkg := range packages {
		slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
	}

	return packages
}

func GetCPUDetails() ([]CPU, error) {
	buf, err := getLogicalProcessorInformationEx()
	if err != nil {
		return nil, err
	}
	packages := processSystemLogicalProcessorInforationList(buf)
	cpus := make([]CPU, len(packages))

	for i, pkg := range packages {
		cpus[i].CoreCount = pkg.coreCount
		cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
		cpus[i].ThreadCount = pkg.threadCount
	}
	return cpus, nil
}