gpu.go 18.9 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32
33
34
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
35
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
	deviceCount int
37
38
}

Michael Yang's avatar
Michael Yang committed
39
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
40
41
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
43
44
)

45
46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
58

59
60
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
61

Daniel Hiltgen's avatar
Daniel Hiltgen committed
62
var RocmComputeMin = 9
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
65
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

86
87
88
89
var NvmlWindowsGlobs = []string{
	"c:\\Windows\\System32\\nvml.dll",
}

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

Wang,Zhe's avatar
Wang,Zhe committed
106
107
108
109
110
111
112
113
114
var OneapiWindowsGlobs = []string{
	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

var OneapiLinuxGlobs = []string{
	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
	"/usr/lib*/libze_intel_gpu.so*",
}

115
116
117
118
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

119
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
120
func initCudaHandles() *cudaHandles {
121

122
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
123

Daniel Hiltgen's avatar
Daniel Hiltgen committed
124
	cHandles := &cudaHandles{}
125
	// Short Circuit if we already know which library to use
126
127
128
129
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
130
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
132
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
133
134
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
136
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
137
138
139
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
140
141
	var cudartMgmtName string
	var cudartMgmtPatterns []string
142
143
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
144
145
	var nvmlMgmtName string
	var nvmlMgmtPatterns []string
146
147

	tmpDir, _ := PayloadsDir()
148
149
	switch runtime.GOOS {
	case "windows":
150
151
152
153
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
154
155
156
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
157
158
159
160
161
162

		// Use nvml to refresh free memory on windows only
		nvmlMgmtName = "nvml.dll"
		nvmlMgmtPatterns = make([]string, len(NvmlWindowsGlobs))
		copy(nvmlMgmtPatterns, NvmlWindowsGlobs)

163
	case "linux":
164
165
166
167
168
169
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
170
171
172
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
173
174

		// nvml omitted on linux
175
	default:
Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
		return cHandles
177
178
	}

179
180
181
182
183
184
185
186
187
188
189
190
	if len(nvmlMgmtPatterns) > 0 {
		nvmlLibPaths := FindGPULibs(nvmlMgmtName, nvmlMgmtPatterns)
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

191
192
193
194
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
195
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
196
197
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
198
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
199
			return cHandles
200
201
202
		}
	}

203
204
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
205
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
206
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
207
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
209
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
210
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
211
			return cHandles
212
213
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
214

Daniel Hiltgen's avatar
Daniel Hiltgen committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}
	var oneapiMgmtName string
	var oneapiMgmtPatterns []string

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

	switch runtime.GOOS {
	case "windows":
		oneapiMgmtName = "ze_intel_gpu64.dll"
		oneapiMgmtPatterns = OneapiWindowsGlobs
	case "linux":
		oneapiMgmtName = "libze_intel_gpu.so"
		oneapiMgmtPatterns = OneapiLinuxGlobs
	default:
		return oHandles
	}

241
242
	oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
243
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
244
245
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
246
	return oHandles
247
248
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
249
func GetGPUInfo() GpuInfoList {
250
251
252
253
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
254
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
255
256
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
257
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
258
259
260
261
262
263
264
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
265
266
267
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
268
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
269
270
271
272
273
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
274
		}
275
	}()
276

277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
	if !bootstrapped {
		slog.Debug("Detecting GPUs")
		needRefresh = false
		cpuCapability = getCPUCapability()
		var memInfo C.mem_info_t
		C.cpu_check_ram(&memInfo)
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return []GpuInfo{}
		}
		cpuInfo := CPUInfo{
			GpuInfo: GpuInfo{
				Library: "cpu",
				Variant: cpuCapability.ToVariant(),
			},
		}
		cpuInfo.TotalMemory = uint64(memInfo.total)
		cpuInfo.FreeMemory = uint64(memInfo.free)
		cpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		cpus = []CPUInfo{cpuInfo}

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability.ToString(), "detected", cpuCapability.ToString())
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
306

307
308
309
310
		// On windows we bundle the nvidia library one level above the runner dir
		depPath := ""
		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
			depPath = filepath.Dir(envconfig.RunnersDir)
311
		}
312
313

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
314
		cHandles = initCudaHandles()
315
316

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
317
318
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
319
320
321
322
323
324
325
326
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
327
328
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
329
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
330
331
332
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
				gpuInfo.MinimumMemory = cudaMinimumMemory
				gpuInfo.DependencyPath = depPath
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
				gpuInfo.DriverMajor = int(driverMajor)
				gpuInfo.DriverMinor = int(driverMinor)

				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
355
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
356
357
358
359
360
361
362
363
364
365
366
367
		}

		// Intel
		oHandles = initOneAPIHandles()
		for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
				continue
			}
			devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
			for i := 0; i < int(devCount); i++ {
368
369
370
371
				gpuInfo := OneapiGPUInfo{
					GpuInfo: GpuInfo{
						Library: "oneapi",
					},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
372
373
					driverIndex: d,
					gpuIndex:    i,
374
375
				}
				// TODO - split bootstrapping from updating free memory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
376
377
				C.oneapi_check_vram(*oHandles.oneapi, C.int(d), C.int(i), &memInfo)
				// TODO - convert this to MinimumMemory based on testing...
378
379
380
381
				var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
				memInfo.free = C.uint64_t(totalFreeMem)
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
382
383
384
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
				// TODO dependency path?
385
386
387
388
389
390
391
392
393
394
395
396
397
398
				oneapiGPUs = append(oneapiGPUs, gpuInfo)
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
		// TODO - CPU system memory tracking/refresh
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
399
400
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
401
402
		}
		for i, gpu := range cudaGPUs {
403
404
405
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
406
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
407
408
409
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
410
			} else {
411
412
413
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
414
415
			}
			if memInfo.err != nil {
416
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
417
418
419
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
420
421
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
422
423
				continue
			}
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
439
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
440
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

458
459
460
		err := RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
461
		}
462
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
463

464
465
466
467
468
469
470
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
471
472
473
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
474
	if len(resp) == 0 {
475
		resp = append(resp, cpus[0].GpuInfo)
476
477
478
479
	}
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
480
func GetCPUMem() (memInfo, error) {
481
482
483
484
485
486
487
488
489
490
491
492
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

493
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
494
495
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
496
	var patterns []string
497
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
498
	slog.Debug("Searching for GPU library", "name", baseLibName)
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
516
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
517
	slog.Debug("gpu library search", "globs", patterns)
518
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
519
520
521
522

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
523
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
524
		}
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
551
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
552
553
554
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
555
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
556
	var resp C.cudart_init_resp_t
557
	resp.ch.verbose = getVerboseState()
558
	for _, libPath := range cudartLibPaths {
559
560
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
561
		C.cudart_init(lib, &resp)
562
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
563
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
564
565
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
566
			return int(resp.num_devices), &resp.ch, libPath
567
568
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
569
	return 0, nil, ""
570
571
}

572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
606
607
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
608
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
609
610
611
612
613
614
615
616
617
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
618
619
620
621
			for i := 0; i < int(resp.oh.num_drivers); i++ {
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
622
623
624
625
626
		}
	}
	return 0, nil, ""
}

627
func getVerboseState() C.uint16_t {
628
	if envconfig.Debug {
629
630
631
632
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
633
634
635
636
637
638
639
640
641
642
643
644
645
646

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
647
648
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
649
650
651
652
653
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}