"git@developer.sourcefind.cn:liming6/dcu-process-montor.git" did not exist on "ee110e57ec065d475848823b9d1146da23e6ad50"
amd_linux.go 14 KB
Newer Older
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1
2
3
4
5
6
7
8
9
10
package gpu

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"log/slog"
	"os"
	"path/filepath"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
11
	"regexp"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
12
13
14
	"slices"
	"strconv"
	"strings"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
15

16
	"github.com/ollama/ollama/envconfig"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
17
	"github.com/ollama/ollama/format"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
18
19
20
21
22
23
24
25
26
27
28
)

// Discovery logic for AMD/ROCm GPUs

const (
	DriverVersionFile     = "/sys/module/amdgpu/version"
	AMDNodesSysfsDir      = "/sys/class/kfd/kfd/topology/nodes/"
	GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"

	// Prefix with the node dir
	GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
29
30

	// Direct Rendering Manager sysfs location
31
	DRMDeviceDirGlob   = "/sys/class/drm/card*/device"
32
33
34
35
36
37
38
	DRMTotalMemoryFile = "mem_info_vram_total"
	DRMUsedMemoryFile  = "mem_info_vram_used"

	// In hex; properties file is in decimal
	DRMUniqueIDFile = "unique_id"
	DRMVendorFile   = "vendor"
	DRMDeviceFile   = "device"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
39
40
41
42
)

var (
	// Used to validate if the given ROCm lib is usable
43
44
	ROCmLibGlobs          = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
	RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46
47
)

// Gather GPU information from the amdgpu driver if any supported GPUs are detected
48
49
func AMDGetGPUInfo() []RocmGPUInfo {
	resp := []RocmGPUInfo{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
50
	if !AMDDetected() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
51
		return resp
Daniel Hiltgen's avatar
Daniel Hiltgen committed
52
53
54
	}

	// Opportunistic logging of driver version to aid in troubleshooting
Daniel Hiltgen's avatar
Daniel Hiltgen committed
55
56
	driverMajor, driverMinor, err := AMDDriverVersion()
	if err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
57
		// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
Daniel Hiltgen's avatar
Daniel Hiltgen committed
58
		slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
59
60
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
61
62
	// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
	var visibleDevices []string
Michael Yang's avatar
string  
Michael Yang committed
63
64
65
	hipVD := envconfig.HipVisibleDevices()   // zero based index only
	rocrVD := envconfig.RocrVisibleDevices() // zero based index or UUID, but consumer cards seem to not support UUID
	gpuDO := envconfig.GpuDeviceOrdinal()    // zero based index
Daniel Hiltgen's avatar
Daniel Hiltgen committed
66
67
68
69
70
71
72
73
74
75
	switch {
	// TODO is this priorty order right?
	case hipVD != "":
		visibleDevices = strings.Split(hipVD, ",")
	case rocrVD != "":
		visibleDevices = strings.Split(rocrVD, ",")
		// TODO - since we don't yet support UUIDs, consider detecting and reporting here
		// all our test systems show GPU-XX indicating UUID is not supported
	case gpuDO != "":
		visibleDevices = strings.Split(gpuDO, ",")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
77
	}

Michael Yang's avatar
string  
Michael Yang committed
78
	gfxOverride := envconfig.HsaOverrideGfxVersion()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
79
80
81
82
83
84
85
86
87
88
89
90
	var supported []string
	libDir := ""

	// The amdgpu driver always exposes the host CPU(s) first, but we have to skip them and subtract
	// from the other IDs to get alignment with the HIP libraries expectations (zero is the first GPU, not the CPU)
	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
	cpuCount := 0
	for _, match := range matches {
		slog.Debug("evaluating amdgpu node " + match)
		fp, err := os.Open(match)
		if err != nil {
			slog.Debug("failed to open sysfs node", "file", match, "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
92
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
93
94
95
96
97
		defer fp.Close()
		nodeID, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
		if err != nil {
			slog.Debug("failed to parse node ID", "error", err)
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
98
99
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
101
102
		scanner := bufio.NewScanner(fp)
		isCPU := false
		var major, minor, patch uint64
103
		var vendor, device, uniqueID uint64
Daniel Hiltgen's avatar
Daniel Hiltgen committed
104
105
106
107
108
		for scanner.Scan() {
			line := strings.TrimSpace(scanner.Text())
			// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
			if strings.HasPrefix(line, "gfx_target_version") {
				ver := strings.Fields(line)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
109

Daniel Hiltgen's avatar
Daniel Hiltgen committed
110
111
112
113
114
115
				// Detect CPUs
				if len(ver) == 2 && ver[1] == "0" {
					slog.Debug("detected CPU " + match)
					isCPU = true
					break
				}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
116

Daniel Hiltgen's avatar
Daniel Hiltgen committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
				if len(ver) != 2 || len(ver[1]) < 5 {
					slog.Warn("malformed "+match, "gfx_target_version", line)
					// If this winds up being a CPU, our offsets may be wrong
					continue
				}
				l := len(ver[1])
				var err1, err2, err3 error
				patch, err1 = strconv.ParseUint(ver[1][l-2:l], 10, 32)
				minor, err2 = strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
				major, err3 = strconv.ParseUint(ver[1][:l-4], 10, 32)
				if err1 != nil || err2 != nil || err3 != nil {
					slog.Debug("malformed int " + line)
					continue
				}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
132
133
			} else if strings.HasPrefix(line, "vendor_id") {
				ver := strings.Fields(line)
				if len(ver) != 2 {
134
					slog.Debug("malformed", "vendor_id", line)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
136
					continue
				}
137
				vendor, err = strconv.ParseUint(ver[1], 10, 64)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
				if err != nil {
139
					slog.Debug("malformed", "vendor_id", line, "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
141
142
143
				}
			} else if strings.HasPrefix(line, "device_id") {
				ver := strings.Fields(line)
				if len(ver) != 2 {
144
					slog.Debug("malformed", "device_id", line)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
146
					continue
				}
147
				device, err = strconv.ParseUint(ver[1], 10, 64)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
148
				if err != nil {
149
150
151
152
153
154
155
156
157
158
159
					slog.Debug("malformed", "device_id", line, "error", err)
				}
			} else if strings.HasPrefix(line, "unique_id") {
				ver := strings.Fields(line)
				if len(ver) != 2 {
					slog.Debug("malformed", "unique_id", line)
					continue
				}
				uniqueID, err = strconv.ParseUint(ver[1], 10, 64)
				if err != nil {
					slog.Debug("malformed", "unique_id", line, "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
160
				}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
161
162
163
164
			}
			// TODO - any other properties we want to extract and record?
			// vendor_id + device_id -> pci lookup for "Name"
			// Other metrics that may help us understand relative performance between multiple GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
165
166
		}

167
168
169
170
		// Note: while ./mem_banks/*/used_memory exists, it doesn't appear to take other VRAM consumers
		// into consideration, so we instead map the device over to the DRM driver sysfs nodes which
		// do reliably report VRAM usage.

Daniel Hiltgen's avatar
Daniel Hiltgen committed
171
172
173
		if isCPU {
			cpuCount++
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
174
175
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
177
		// CPUs are always first in the list
		gpuID := nodeID - cpuCount
Daniel Hiltgen's avatar
Daniel Hiltgen committed
178

Daniel Hiltgen's avatar
Daniel Hiltgen committed
179
180
181
		// Shouldn't happen, but just in case...
		if gpuID < 0 {
			slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
182
			return nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
183
184
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
185
		if int(major) < RocmComputeMin {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
186
			slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
188
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
189
190

		// Look up the memory for the current node
Daniel Hiltgen's avatar
Daniel Hiltgen committed
191
192
		totalMemory := uint64(0)
		usedMemory := uint64(0)
193
		var usedFile string
194
195
196
197
198
199
200
		mapping := []struct {
			id       uint64
			filename string
		}{
			{vendor, DRMVendorFile},
			{device, DRMDeviceFile},
			{uniqueID, DRMUniqueIDFile}, // Not all devices will report this
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
		}
202
203
204
205
206
207
208
		slog.Debug("mapping amdgpu to drm sysfs nodes", "amdgpu", match, "vendor", vendor, "device", device, "unique_id", uniqueID)
		// Map over to DRM location to find the total/free memory
		drmMatches, _ := filepath.Glob(DRMDeviceDirGlob)
		for _, devDir := range drmMatches {
			matched := true
			for _, m := range mapping {
				if m.id == 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
209
					// Null ID means it didn't populate, so we can't use it to match
210
211
212
					continue
				}
				filename := filepath.Join(devDir, m.filename)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
213
				buf, err := os.ReadFile(filename)
214
215
216
217
218
				if err != nil {
					slog.Debug("failed to read sysfs node", "file", filename, "error", err)
					matched = false
					break
				}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
219
				// values here are in hex, strip off the lead 0x and parse so we can compare the numeric (decimal) values in amdgpu
220
221
222
223
224
225
226
227
228
229
230
231
				cmp, err := strconv.ParseUint(strings.TrimPrefix(strings.TrimSpace(string(buf)), "0x"), 16, 64)
				if err != nil {
					slog.Debug("failed to parse sysfs node", "file", filename, "error", err)
					matched = false
					break
				}
				if cmp != m.id {
					matched = false
					break
				}
			}
			if !matched {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
232
233
				continue
			}
234
235
236
237

			// Found the matching DRM directory
			slog.Debug("matched", "amdgpu", match, "drm", devDir)
			totalFile := filepath.Join(devDir, DRMTotalMemoryFile)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
238
			buf, err := os.ReadFile(totalFile)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
239
			if err != nil {
240
241
				slog.Debug("failed to read sysfs node", "file", totalFile, "error", err)
				break
Daniel Hiltgen's avatar
Daniel Hiltgen committed
242
			}
243
			totalMemory, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
244
			if err != nil {
245
246
247
248
				slog.Debug("failed to parse sysfs node", "file", totalFile, "error", err)
				break
			}

249
250
			usedFile = filepath.Join(devDir, DRMUsedMemoryFile)
			usedMemory, err = getFreeMemory(usedFile)
251
			if err != nil {
252
				slog.Debug("failed to update used memory", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
253
			}
254
			break
Daniel Hiltgen's avatar
Daniel Hiltgen committed
255
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
256
257
258

		// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
		if totalMemory < IGPUMemLimit {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
259
			slog.Info("unsupported Radeon iGPU detected skipping", "id", gpuID, "total", format.HumanBytes2(totalMemory))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
260
261
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
262
263
264
265
266
		var name string
		// TODO - PCI ID lookup
		if vendor > 0 && device > 0 {
			name = fmt.Sprintf("%04x:%04x", vendor, device)
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
267

Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
269
		slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
		slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
270
271
272
273
274
275
276
		gpuInfo := RocmGPUInfo{
			GpuInfo: GpuInfo{
				Library: "rocm",
				memInfo: memInfo{
					TotalMemory: totalMemory,
					FreeMemory:  (totalMemory - usedMemory),
				},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
277
				ID:            strconv.Itoa(gpuID),
278
279
280
281
282
				Name:          name,
				Compute:       fmt.Sprintf("gfx%d%x%x", major, minor, patch),
				MinimumMemory: rocmMinimumMemory,
				DriverMajor:   driverMajor,
				DriverMinor:   driverMinor,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
283
			},
284
			usedFilepath: usedFile,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
		}

		// If the user wants to filter to a subset of devices, filter out if we aren't a match
		if len(visibleDevices) > 0 {
			include := false
			for _, visible := range visibleDevices {
				if visible == gpuInfo.ID {
					include = true
					break
				}
			}
			if !include {
				slog.Info("filtering out device per user request", "id", gpuInfo.ID, "visible_devices", visibleDevices)
				continue
			}
		}

		// Final validation is gfx compatibility - load the library if we haven't already loaded it
		// even if the user overrides, we still need to validate the library
		if libDir == "" {
			libDir, err = AMDValidateLibDir()
			if err != nil {
				slog.Warn("unable to verify rocm library, will use cpu", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
308
				return nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
309
310
311
312
313
314
315
316
317
318
			}
		}
		gpuInfo.DependencyPath = libDir

		if gfxOverride == "" {
			// Only load supported list once
			if len(supported) == 0 {
				supported, err = GetSupportedGFX(libDir)
				if err != nil {
					slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
319
					return nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
320
321
322
				}
				slog.Debug("rocm supported GPUs", "types", supported)
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
323
			gfx := gpuInfo.Compute
Daniel Hiltgen's avatar
Daniel Hiltgen committed
324
325
326
327
328
329
330
331
332
			if !slices.Contains[[]string, string](supported, gfx) {
				slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
				// TODO - consider discrete markdown just for ROCM troubleshooting?
				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
				continue
			} else {
				slog.Info("amdgpu is supported", "gpu", gpuInfo.ID, "gpu_type", gfx)
			}
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
333
			slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
334
335
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
336
337
338
339
340
		// Check for env var workarounds
		if name == "1002:687f" { // Vega RX 56
			gpuInfo.EnvWorkarounds = append(gpuInfo.EnvWorkarounds, [2]string{"HSA_ENABLE_SDMA", "0"})
		}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
341
342
		// The GPU has passed all the verification steps and is supported
		resp = append(resp, gpuInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
343
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
344
345
	if len(resp) == 0 {
		slog.Info("no compatible amdgpu devices detected")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
346
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
347
	return resp
Daniel Hiltgen's avatar
Daniel Hiltgen committed
348
349
350
351
352
353
354
355
356
357
358
}

// Quick check for AMD driver so we can skip amdgpu discovery if not present
func AMDDetected() bool {
	// Some driver versions (older?) don't have a version file, so just lookup the parent dir
	sysfsDir := filepath.Dir(DriverVersionFile)
	_, err := os.Stat(sysfsDir)
	if errors.Is(err, os.ErrNotExist) {
		slog.Debug("amdgpu driver not detected " + sysfsDir)
		return false
	} else if err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
359
		slog.Debug("error looking up amd driver", "path", sysfsDir, "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
360
361
362
363
364
365
366
367
		return false
	}
	return true
}

// Prefer to use host installed ROCm, as long as it meets our minimum requirements
// failing that, tell the user how to download it on their own
func AMDValidateLibDir() (string, error) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
368
	libDir, err := commonAMDValidateLibDir()
369
	if err == nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
370
		return libDir, nil
371
372
	}

373
374
375
	// Well known ollama installer path
	installedRocmDir := "/usr/share/ollama/lib/rocm"
	if rocmLibUsable(installedRocmDir) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
376
		return installedRocmDir, nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
377
378
	}

379
380
	// If we still haven't found a usable rocm, the user will have to install it on their own
	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or follow manual install instructions at https://github.com/ollama/ollama/blob/main/docs/linux.md#manual-install")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
381
382
383
	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
384
385
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
	_, err = os.Stat(DriverVersionFile)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
386
	if err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
387
		return 0, 0, fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
388
389
390
	}
	fp, err := os.Open(DriverVersionFile)
	if err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
391
		return 0, 0, err
Daniel Hiltgen's avatar
Daniel Hiltgen committed
392
393
394
395
	}
	defer fp.Close()
	verString, err := io.ReadAll(fp)
	if err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
		return 0, 0, err
	}

	pattern := `\A(\d+)\.(\d+).*`
	regex := regexp.MustCompile(pattern)
	match := regex.FindStringSubmatch(string(verString))
	if len(match) < 2 {
		return 0, 0, fmt.Errorf("malformed version string %s", string(verString))
	}
	driverMajor, err = strconv.Atoi(match[1])
	if err != nil {
		return 0, 0, err
	}
	driverMinor, err = strconv.Atoi(match[2])
	if err != nil {
		return 0, 0, err
Daniel Hiltgen's avatar
Daniel Hiltgen committed
412
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
413
	return driverMajor, driverMinor, nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
414
}
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431

func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
	if len(gpus) == 0 {
		return nil
	}
	for i := range gpus {
		usedMemory, err := getFreeMemory(gpus[i].usedFilepath)
		if err != nil {
			return err
		}
		slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(gpus[i].TotalMemory-usedMemory))
		gpus[i].FreeMemory = gpus[i].TotalMemory - usedMemory
	}
	return nil
}

func getFreeMemory(usedFile string) (uint64, error) {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
432
	buf, err := os.ReadFile(usedFile)
433
434
435
436
437
438
439
440
441
442
	if err != nil {
		return 0, fmt.Errorf("failed to read sysfs node %s %w", usedFile, err)
	}
	usedMemory, err := strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
	if err != nil {
		slog.Debug("failed to parse sysfs node", "file", usedFile, "error", err)
		return 0, fmt.Errorf("failed to parse sysfs node %s %w", usedFile, err)
	}
	return usedMemory, nil
}