gpu.go 20.5 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
#include "gpu_info.h"

*/
import "C"
import (
14
15
	"bufio"
	"bytes"
16
	"fmt"
17
	"log/slog"
18
19
	"os"
	"path/filepath"
20
	"runtime"
21
	"strings"
22
23
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
24

25
	"github.com/ollama/ollama/envconfig"
26
	"github.com/ollama/ollama/format"
27
28
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
31
	deviceCount int
	cudart      *C.cudart_handle_t
32
	nvcuda      *C.nvcuda_handle_t
33
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
36
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
37
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	deviceCount int
39
40
}

Michael Yang's avatar
Michael Yang committed
41
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
45
46
)

47
48
49
50
51
52
53
54
55
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
56
	nvmlLibPath   string
57
58
59
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
60

61
62
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
var RocmComputeMin = 9
65

Daniel Hiltgen's avatar
Daniel Hiltgen committed
66
67
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

88
89
90
91
var NvmlWindowsGlobs = []string{
	"c:\\Windows\\System32\\nvml.dll",
}

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

Wang,Zhe's avatar
Wang,Zhe committed
108
109
110
111
112
113
114
115
116
var OneapiWindowsGlobs = []string{
	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

var OneapiLinuxGlobs = []string{
	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
	"/usr/lib*/libze_intel_gpu.so*",
}

117
118
119
120
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

121
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
func initCudaHandles() *cudaHandles {
123

124
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
125

Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
	cHandles := &cudaHandles{}
127
	// Short Circuit if we already know which library to use
128
129
130
131
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
132
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
134
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
135
136
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
138
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
139
140
141
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
142
143
	var cudartMgmtName string
	var cudartMgmtPatterns []string
144
145
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
146
147
	var nvmlMgmtName string
	var nvmlMgmtPatterns []string
148
149

	tmpDir, _ := PayloadsDir()
150
151
	switch runtime.GOOS {
	case "windows":
152
153
154
155
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
156
157
158
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
159
160
161
162
163
164

		// Use nvml to refresh free memory on windows only
		nvmlMgmtName = "nvml.dll"
		nvmlMgmtPatterns = make([]string, len(NvmlWindowsGlobs))
		copy(nvmlMgmtPatterns, NvmlWindowsGlobs)

165
	case "linux":
166
167
168
169
170
171
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
172
173
174
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
175
176

		// nvml omitted on linux
177
	default:
Daniel Hiltgen's avatar
Daniel Hiltgen committed
178
		return cHandles
179
180
	}

181
182
183
184
185
186
187
188
189
190
191
192
	if len(nvmlMgmtPatterns) > 0 {
		nvmlLibPaths := FindGPULibs(nvmlMgmtName, nvmlMgmtPatterns)
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

193
194
195
196
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
198
199
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
200
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
			return cHandles
202
203
204
		}
	}

205
206
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
207
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
208
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
209
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
210
211
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
212
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
213
			return cHandles
214
215
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
216

Daniel Hiltgen's avatar
Daniel Hiltgen committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}
	var oneapiMgmtName string
	var oneapiMgmtPatterns []string

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

	switch runtime.GOOS {
	case "windows":
		oneapiMgmtName = "ze_intel_gpu64.dll"
		oneapiMgmtPatterns = OneapiWindowsGlobs
	case "linux":
		oneapiMgmtName = "libze_intel_gpu.so"
		oneapiMgmtPatterns = OneapiLinuxGlobs
	default:
		return oHandles
	}

243
244
	oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
245
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
246
247
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
248
	return oHandles
249
250
}

251
252
253
254
255
256
257
258
259
260
261
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
262
func GetGPUInfo() GpuInfoList {
263
264
265
266
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
267
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
269
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
270
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
271
272
273
274
275
276
277
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
278
279
280
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
281
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
282
283
284
285
286
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
287
		}
288
	}()
289

290
291
292
293
294
	if !bootstrapped {
		slog.Debug("Detecting GPUs")
		needRefresh = false
		cpuCapability = getCPUCapability()
		var memInfo C.mem_info_t
295
296
297
298

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
299
		}
300
		cpus = []CPUInfo{CPUInfo{
301
			GpuInfo: GpuInfo{
302
				memInfo: mem,
303
304
				Library: "cpu",
				Variant: cpuCapability.ToVariant(),
305
				ID:      "0",
306
			},
307
		}}
308
309
310
311
312
313
314
315

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability.ToString(), "detected", cpuCapability.ToString())
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
316

317
318
319
320
		// On windows we bundle the nvidia library one level above the runner dir
		depPath := ""
		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
			depPath = filepath.Dir(envconfig.RunnersDir)
321
		}
322
323

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
324
		cHandles = initCudaHandles()
325
326

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
327
328
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
329
330
331
332
333
334
335
336
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
337
338
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
339
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
340
341
342
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
				gpuInfo.MinimumMemory = cudaMinimumMemory
				gpuInfo.DependencyPath = depPath
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
				gpuInfo.DriverMajor = int(driverMajor)
				gpuInfo.DriverMinor = int(driverMinor)

				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
365
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
366
367
368
369
370
371
372
373
374
375
376
377
		}

		// Intel
		oHandles = initOneAPIHandles()
		for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
				continue
			}
			devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
			for i := 0; i < int(devCount); i++ {
378
379
380
381
				gpuInfo := OneapiGPUInfo{
					GpuInfo: GpuInfo{
						Library: "oneapi",
					},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
382
383
					driverIndex: d,
					gpuIndex:    i,
384
385
				}
				// TODO - split bootstrapping from updating free memory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
386
387
				C.oneapi_check_vram(*oHandles.oneapi, C.int(d), C.int(i), &memInfo)
				// TODO - convert this to MinimumMemory based on testing...
388
389
390
391
				var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
				memInfo.free = C.uint64_t(totalFreeMem)
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
392
393
394
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
				// TODO dependency path?
395
396
397
398
399
400
401
402
403
404
405
406
				oneapiGPUs = append(oneapiGPUs, gpuInfo)
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
		}

426
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
427
428
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
429
430
		}
		for i, gpu := range cudaGPUs {
431
432
433
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
434
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
435
436
437
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
438
			} else {
439
440
441
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
442
443
			}
			if memInfo.err != nil {
444
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
445
446
447
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
448
449
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
450
451
				continue
			}
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
467
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
468
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

486
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
487
488
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
489
		}
490
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
491

492
493
494
495
496
497
498
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
499
500
501
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
502
	if len(resp) == 0 {
503
		resp = append(resp, cpus[0].GpuInfo)
504
505
506
507
	}
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
508
func GetCPUMem() (memInfo, error) {
509
510
511
	if runtime.GOOS == "linux" {
		return GetLinuxMemInfo()
	}
512
513
514
515
516
517
518
519
520
521
522
523
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

524
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
525
526
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
527
	var patterns []string
528
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
529
	slog.Debug("Searching for GPU library", "name", baseLibName)
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
547
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
548
	slog.Debug("gpu library search", "globs", patterns)
549
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
550
551
552
553

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
554
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
555
		}
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
582
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
583
584
585
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
586
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
587
	var resp C.cudart_init_resp_t
588
	resp.ch.verbose = getVerboseState()
589
	for _, libPath := range cudartLibPaths {
590
591
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
592
		C.cudart_init(lib, &resp)
593
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
594
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
595
596
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
597
			return int(resp.num_devices), &resp.ch, libPath
598
599
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
600
	return 0, nil, ""
601
602
}

603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
637
638
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
639
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
640
641
642
643
644
645
646
647
648
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
649
650
651
652
			for i := 0; i < int(resp.oh.num_drivers); i++ {
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
653
654
655
656
657
		}
	}
	return 0, nil, ""
}

658
func getVerboseState() C.uint16_t {
659
	if envconfig.Debug {
660
661
662
663
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
664
665
666
667
668
669
670
671
672
673
674
675
676
677

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
678
679
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
680
681
682
683
684
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723

func GetLinuxMemInfo() (memInfo, error) {
	var mem memInfo
	var total, available, free, buffers, cached uint64
	f, err := os.Open("/proc/meminfo")
	if err != nil {
		return mem, err
	}
	defer f.Close()
	s := bufio.NewScanner(f)
	for s.Scan() {
		switch {
		case bytes.HasPrefix(s.Bytes(), []byte(`MemTotal:`)):
			_, err = fmt.Sscanf(s.Text(), "MemTotal:%d", &total)
		case bytes.HasPrefix(s.Bytes(), []byte(`MemAvailable:`)):
			_, err = fmt.Sscanf(s.Text(), "MemAvailable:%d", &available)
		case bytes.HasPrefix(s.Bytes(), []byte(`MemFree:`)):
			_, err = fmt.Sscanf(s.Text(), "MemFree:%d", &free)
		case bytes.HasPrefix(s.Bytes(), []byte(`Buffers:`)):
			_, err = fmt.Sscanf(s.Text(), "Buffers:%d", &buffers)
		case bytes.HasPrefix(s.Bytes(), []byte(`Cached:`)):
			_, err = fmt.Sscanf(s.Text(), "Cached:%d", &cached)
		default:
			continue
		}
		if err != nil {
			return mem, err
		}

		if total > 0 && available > 0 {
			mem.TotalMemory = total * 1024
			mem.FreeMemory = available * 1024
			return mem, nil
		}
	}
	mem.TotalMemory = total * 1024
	mem.FreeMemory = (free + buffers + cached) * 1024
	return mem, nil
}