gpu.go 22.5 KB
Newer Older
1
2
//go:build linux || windows

3
package discover
4
5

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strconv"
20
	"strings"
21
22
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
23

24
	"github.com/ollama/ollama/envconfig"
25
	"github.com/ollama/ollama/format"
26
	"github.com/ollama/ollama/runners"
27
28
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
31
	deviceCount int
	cudart      *C.cudart_handle_t
32
	nvcuda      *C.nvcuda_handle_t
33
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
36
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
37
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	deviceCount int
39
40
}

Michael Yang's avatar
Michael Yang committed
41
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
45
46
)

47
48
49
50
51
52
53
54
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
55
	nvmlLibPath   string
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
58
59
60
61
62
63
64

	// If any discovered GPUs are incompatible, report why
	unsupportedGPUs []UnsupportedGPUInfo

	// Keep track of errors during bootstrapping so that if GPUs are missing
	// they expected to be present this may explain why
	bootstrapErrors []error
65
)
66

67
// With our current CUDA compile flags, older than 5.0 will not work properly
68
69
70
71
72
// (string values used to allow ldflags overrides at build time)
var (
	CudaComputeMajorMin = "5"
	CudaComputeMinorMin = "0"
)
73

74
var RocmComputeMajorMin = "9"
75

Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
77
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
78

79
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
func initCudaHandles() *cudaHandles {
81
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
82

Daniel Hiltgen's avatar
Daniel Hiltgen committed
83
	cHandles := &cudaHandles{}
84
	// Short Circuit if we already know which library to use
85
	// ignore bootstrap errors in this case since we already recorded them
86
	if nvmlLibPath != "" {
87
		cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
88
89
		return cHandles
	}
90
	if nvcudaLibPath != "" {
91
		cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
		return cHandles
93
94
	}
	if cudartLibPath != "" {
95
		cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
		return cHandles
97
98
99
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
100
	var cudartMgmtPatterns []string
101

Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
103
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
104

Daniel Hiltgen's avatar
Daniel Hiltgen committed
105
106
107
108
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
109
110
111
	libDirs := LibraryDirs()
	for _, d := range libDirs {
		cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(d, CudartMgmtName))
112
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
113
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
114

Daniel Hiltgen's avatar
Daniel Hiltgen committed
115
116
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
117
		if len(nvmlLibPaths) > 0 {
118
			nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
119
120
121
122
123
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
124
125
126
			if err != nil {
				bootstrapErrors = append(bootstrapErrors, err)
			}
127
128
129
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
130
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
131
	if len(nvcudaLibPaths) > 0 {
132
		deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
133
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
136
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
137
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
			return cHandles
139
		}
140
141
142
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
143
144
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
146
	if len(cudartLibPaths) > 0 {
147
		deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
148
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
149
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
150
151
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
152
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
153
			return cHandles
154
		}
155
156
157
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
158
	}
Wang,Zhe's avatar
Wang,Zhe committed
159

Daniel Hiltgen's avatar
Daniel Hiltgen committed
160
161
162
163
164
165
166
167
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
168
	// ignore bootstrap errors in this case since we already recorded them
Daniel Hiltgen's avatar
Daniel Hiltgen committed
169
	if oneapiLibPath != "" {
170
		oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
171
172
173
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
174
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
175
	if len(oneapiLibPaths) > 0 {
176
177
178
179
180
		var err error
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
181
182
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
183
	return oHandles
184
185
}

186
187
188
189
190
191
192
193
194
195
196
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
func GetGPUInfo() GpuInfoList {
198
199
200
201
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
202
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
203
204
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
205
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
206
207
208
209
210
211
212
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
213
214
215
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
216
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
217
218
219
220
221
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
222
		}
223
	}()
224

225
	if !bootstrapped {
226
		slog.Info("looking for compatible GPUs")
227
228
229
230
231
232
233
234
		cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
		}
		cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
		}
235
		bootstrapErrors = []error{}
236
237
		needRefresh = false
		var memInfo C.mem_info_t
238
239
240
241

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
242
		}
243
		depPaths := LibraryDirs()
244
245
246
247
		details, err := GetCPUDetails()
		if err != nil {
			slog.Warn("failed to lookup CPU details", "error", err)
		}
Michael Yang's avatar
lint  
Michael Yang committed
248
249
250
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
251
252
					memInfo:        mem,
					Library:        "cpu",
253
					Variant:        runners.GetCPUCapability().String(),
254
					ID:             "0",
255
					DependencyPath: depPaths,
Michael Yang's avatar
lint  
Michael Yang committed
256
				},
257
				CPUs: details,
258
			},
Michael Yang's avatar
lint  
Michael Yang committed
259
		}
260
261

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
262
		cHandles = initCudaHandles()
263
264

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
265
266
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
267
268
269
270
271
272
273
274
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
275
276
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
277
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
278
279
280
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
281
282
283
284
285
286
287
288
289
290
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
291
292
				gpuInfo.computeMajor = int(memInfo.major)
				gpuInfo.computeMinor = int(memInfo.minor)
293
				gpuInfo.MinimumMemory = cudaMinimumMemory
294
295
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
Daniel Hiltgen's avatar
Daniel Hiltgen committed
296
				variant := cudaVariant(gpuInfo)
297
298
				if depPaths != nil {
					gpuInfo.DependencyPath = depPaths
299
					// Check for variant specific directory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
300
					if variant != "" {
301
302
303
304
305
306
						for _, d := range depPaths {
							if _, err := os.Stat(filepath.Join(d, "cuda_"+variant)); err == nil {
								// Put the variant directory first in the search path to avoid runtime linking to the wrong library
								gpuInfo.DependencyPath = append([]string{filepath.Join(d, "cuda_"+variant)}, gpuInfo.DependencyPath...)
								break
							}
307
308
309
						}
					}
				}
310
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
311
				gpuInfo.Variant = variant
312

313
				if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
314
315
316
317
318
319
320
321
					unsupportedGPUs = append(unsupportedGPUs,
						UnsupportedGPUInfo{
							GpuInfo: gpuInfo.GpuInfo,
						})
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
322
323
324
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
325
326
327
					uuid := C.CString(gpuInfo.ID)
					defer C.free(unsafe.Pointer(uuid))
					C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

346
347
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
348
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
349
350
351
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
352
		if envconfig.IntelGPU() {
353
			oHandles = initOneAPIHandles()
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
379
						gpuInfo.DependencyPath = depPaths
380
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
381
					}
382
383
384
385
				}
			}
		}

386
387
388
389
		rocmGPUs, err = AMDGetGPUInfo()
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
390
		bootstrapped = true
391
392
393
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
394
395

		// TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
396
397
398
399
400
401
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
402
403
404
405
406
407
408
409
410
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
411
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
412
413
414
415
416
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
417
					"free_swap", format.HumanBytes2(mem.FreeSwap),
418
419
420
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
421
			cpus[0].FreeSwap = mem.FreeSwap
422
423
		}

424
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
425
426
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
427
428
		}
		for i, gpu := range cudaGPUs {
429
			if cHandles.nvml != nil {
430
431
432
				uuid := C.CString(gpu.ID)
				defer C.free(unsafe.Pointer(uuid))
				C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
433
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
434
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
435
436
437
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
438
			} else {
439
440
441
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
442
443
			}
			if memInfo.err != nil {
444
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
445
446
447
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
448
449
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
450
451
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
452
453
454
455
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
456
457
458
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
459
				"overhead", format.HumanBytes2(gpu.OSOverhead),
460
461
462
463
464
465
466
467
468
469
470
471
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
472
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
473
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

491
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
492
493
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
494
		}
495
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
496

497
498
499
500
501
502
503
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
504
505
506
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
507
	if len(resp) == 0 {
508
		resp = append(resp, cpus[0].GpuInfo)
509
510
511
512
	}
	return resp
}

513
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
514
515
516
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
517
	slog.Debug("Searching for GPU library", "name", baseLibName)
518

Daniel Hiltgen's avatar
Daniel Hiltgen committed
519
	// Start with our bundled libraries
520
521
522
523
	patterns := []string{}
	for _, d := range LibraryDirs() {
		patterns = append(patterns, filepath.Join(d, baseLibName))
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
524

525
526
527
528
529
530
531
532
	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
533
534

	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
535
536
537
538
539
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
540
		patterns = append(patterns, filepath.Join(d, baseLibName))
541
	}
542
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
543
	slog.Debug("gpu library search", "globs", patterns)
544
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
545
546
547
548

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
549
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
550
		}
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
577
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
578
579
580
	return gpuLibPaths
}

581
582
583
// Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
584
	var resp C.cudart_init_resp_t
585
	resp.ch.verbose = getVerboseState()
586
	var err error
587
	for _, libPath := range cudartLibPaths {
588
589
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
590
		C.cudart_init(lib, &resp)
591
		if resp.err != nil {
592
593
			err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
594
595
			C.free(unsafe.Pointer(resp.err))
		} else {
596
597
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
598
599
		}
	}
600
	return 0, nil, "", err
601
602
}

603
604
605
// Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
606
607
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
608
	var err error
609
610
611
612
613
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
614
615
616
			// Decide what log level based on the type of error message to help users understand why
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
617
618
				err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
				slog.Warn(err.Error())
619
			case C.CUDA_ERROR_NO_DEVICE:
620
621
				err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
				slog.Info(err.Error())
622
			case C.CUDA_ERROR_UNKNOWN:
623
624
				err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
				slog.Warn(err.Error())
625
			default:
626
				msg := C.GoString(resp.err)
627
628
629
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
630
631
					err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
					slog.Info(err.Error())
632
633
				}
			}
634
635
			C.free(unsafe.Pointer(resp.err))
		} else {
636
637
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
638
639
		}
	}
640
	return 0, nil, "", err
641
642
}

643
644
645
// Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
646
647
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
648
	var err error
649
650
651
652
653
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
654
655
			err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
			slog.Info(err.Error())
656
657
			C.free(unsafe.Pointer(resp.err))
		} else {
658
659
			err = nil
			return &resp.ch, libPath, err
660
661
		}
	}
662
	return nil, "", err
663
664
}

665
666
667
// bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
Wang,Zhe's avatar
Wang,Zhe committed
668
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
669
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
670
	resp.oh.verbose = getVerboseState()
671
	var err error
Wang,Zhe's avatar
Wang,Zhe committed
672
673
674
675
676
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
677
678
			err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
Wang,Zhe's avatar
Wang,Zhe committed
679
680
			C.free(unsafe.Pointer(resp.err))
		} else {
681
			err = nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
682
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
683
684
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
685
			return num_devices, &resp.oh, libPath, err
Wang,Zhe's avatar
Wang,Zhe committed
686
687
		}
	}
688
	return 0, nil, "", err
Wang,Zhe's avatar
Wang,Zhe committed
689
690
}

691
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
692
	if envconfig.Debug() {
693
694
695
696
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
697
698
699
700
701
702
703
704
705
706
707
708
709
710

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
711
712
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
713
714
715
716
717
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
718

719
720
721
722
func LibraryDirs() []string {
	// dependencies can exist wherever we found the runners (e.g. build tree for developers) and relative to the executable
	// This can be simplified once we no longer carry runners as payloads
	paths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
723
724
725
	appExe, err := os.Executable()
	if err != nil {
		slog.Warn("failed to lookup executable path", "error", err)
726
727
728
729
730
	} else {
		appRelative := filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe(), "lib", "ollama")
		if _, err := os.Stat(appRelative); err == nil {
			paths = append(paths, appRelative)
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
731
	}
732
	rDir := runners.Locate()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
733
	if err != nil {
734
735
736
		slog.Warn("unable to locate gpu dependency libraries", "error", err)
	} else {
		paths = append(paths, filepath.Dir(rDir))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
737
	}
738
	return paths
Daniel Hiltgen's avatar
Daniel Hiltgen committed
739
}
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759

func GetSystemInfo() SystemInfo {
	gpus := GetGPUInfo()
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
	discoveryErrors := []string{}
	for _, err := range bootstrapErrors {
		discoveryErrors = append(discoveryErrors, err.Error())
	}
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
		System:          cpus[0],
		GPUs:            gpus,
		UnsupportedGPUs: unsupportedGPUs,
		DiscoveryErrors: discoveryErrors,
	}
}