gpu.go 17.7 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32
33
34
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
35
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
	deviceCount int
37
38
}

Michael Yang's avatar
Michael Yang committed
39
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
40
41
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
43
44
)

45
46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
58

59
60
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
61

Daniel Hiltgen's avatar
Daniel Hiltgen committed
62
var RocmComputeMin = 9
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
65
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
66

67
68
69
70
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

71
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
72
func initCudaHandles() *cudaHandles {
73

74
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
75

Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
	cHandles := &cudaHandles{}
77
	// Short Circuit if we already know which library to use
78
79
80
81
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
82
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
83
84
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
85
86
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
87
88
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
89
90
91
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
92
	var cudartMgmtPatterns []string
93

Daniel Hiltgen's avatar
Daniel Hiltgen committed
94
95
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
96

Daniel Hiltgen's avatar
Daniel Hiltgen committed
97
98
99
100
101
102
103
104
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
	tmpDir, _ := PayloadsDir()
	if tmpDir != "" {
		// TODO - add "payloads" for subprocess
		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
105
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
106
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
107

Daniel Hiltgen's avatar
Daniel Hiltgen committed
108
109
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
110
111
112
113
114
115
116
117
118
119
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
120
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
121
122
123
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
124
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
126
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
127
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
128
			return cHandles
129
130
131
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
132
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
133
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
135
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
138
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
139
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
			return cHandles
141
142
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
143

Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
145
146
147
148
149
150
151
152
153
154
155
156
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
157
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
158
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
159
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
160
161
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
162
	return oHandles
163
164
}

165
166
167
168
169
170
171
172
173
174
175
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
func GetGPUInfo() GpuInfoList {
177
178
179
180
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
181
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
182
183
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
184
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
185
186
187
188
189
190
191
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
192
193
194
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
195
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
196
197
198
199
200
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
201
		}
202
	}()
203

204
	if !bootstrapped {
205
		slog.Info("looking for compatible GPUs")
206
		needRefresh = false
Daniel Hiltgen's avatar
Daniel Hiltgen committed
207
		cpuCapability = GetCPUCapability()
208
		var memInfo C.mem_info_t
209
210
211
212

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
213
		}
214
		cpus = []CPUInfo{CPUInfo{
215
			GpuInfo: GpuInfo{
216
				memInfo: mem,
217
				Library: "cpu",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
218
				Variant: cpuCapability,
219
				ID:      "0",
220
			},
221
		}}
222
223
224

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
225
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
226
227
228
229
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
230

231
232
233
		// On windows we bundle the nvidia library one level above the runner dir
		depPath := ""
		if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
234
			depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "cuda")
235
		}
236
237

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
238
		cHandles = initCudaHandles()
239
240

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
241
242
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
243
244
245
246
247
248
249
250
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
251
252
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
253
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
254
255
256
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
				gpuInfo.MinimumMemory = cudaMinimumMemory
				gpuInfo.DependencyPath = depPath
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
274
275
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
276
277
278

				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
279
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
280
281
282
		}

		// Intel
283
284
		if envconfig.IntelGpu {
			oHandles = initOneAPIHandles()
285
286
287
288
289
290
			// On windows we bundle the oneapi library one level above the runner dir
			depPath = ""
			if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
				depPath = filepath.Join(filepath.Dir(envconfig.RunnersDir), "oneapi")
			}

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
			for d := range oHandles.oneapi.num_drivers {
				if oHandles.oneapi == nil {
					// shouldn't happen
					slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
					continue
				}
				devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
				for i := range devCount {
					gpuInfo := OneapiGPUInfo{
						GpuInfo: GpuInfo{
							Library: "oneapi",
						},
						driverIndex: int(d),
						gpuIndex:    int(i),
					}
					// TODO - split bootstrapping from updating free memory
					C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
					// TODO - convert this to MinimumMemory based on testing...
					var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
					memInfo.free = C.uint64_t(totalFreeMem)
					gpuInfo.TotalMemory = uint64(memInfo.total)
					gpuInfo.FreeMemory = uint64(memInfo.free)
					gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
					gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
315
					gpuInfo.DependencyPath = depPath
316
					oneapiGPUs = append(oneapiGPUs, gpuInfo)
317
318
319
320
321
322
				}
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
323
324
325
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
326
327
328
329
330
331
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
		}

351
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
352
353
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
354
355
		}
		for i, gpu := range cudaGPUs {
356
357
358
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
359
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
360
361
362
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
363
			} else {
364
365
366
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
367
368
			}
			if memInfo.err != nil {
369
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
370
371
372
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
373
374
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
375
376
				continue
			}
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
392
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
393
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

411
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
412
413
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
414
		}
415
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
416

417
418
419
420
421
422
423
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
424
425
426
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
427
	if len(resp) == 0 {
428
		resp = append(resp, cpus[0].GpuInfo)
429
430
431
432
	}
	return resp
}

433
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
434
435
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
436
	var patterns []string
437
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
438
	slog.Debug("Searching for GPU library", "name", baseLibName)
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
456
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
457
	slog.Debug("gpu library search", "globs", patterns)
458
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
459
460
461
462

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
463
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
464
		}
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
491
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
492
493
494
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
495
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
496
	var resp C.cudart_init_resp_t
497
	resp.ch.verbose = getVerboseState()
498
	for _, libPath := range cudartLibPaths {
499
500
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
501
		C.cudart_init(lib, &resp)
502
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
503
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
504
505
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
506
			return int(resp.num_devices), &resp.ch, libPath
507
508
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
509
	return 0, nil, ""
510
511
}

512
513
514
515
516
517
518
519
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
			// Decide what log level based on the type of error message to help users understand why
			msg := C.GoString(resp.err)
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
			case C.CUDA_ERROR_NO_DEVICE:
				slog.Info("no nvidia devices detected", "library", libPath)
			case C.CUDA_ERROR_UNKNOWN:
				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
			default:
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
				}
			}
537
538
539
540
541
542
543
544
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
562
563
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
564
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
565
566
567
568
569
570
571
572
573
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
574
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
575
576
577
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
578
579
580
581
582
		}
	}
	return 0, nil, ""
}

583
func getVerboseState() C.uint16_t {
584
	if envconfig.Debug {
585
586
587
588
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
589
590
591
592
593
594
595
596
597
598
599
600
601
602

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
603
604
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
605
606
607
608
609
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}