gpu.go 21.7 KB
Newer Older
1
2
//go:build linux || windows

3
package discover
4
5

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strconv"
20
	"strings"
21
22
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
23

24
	"github.com/ollama/ollama/envconfig"
25
	"github.com/ollama/ollama/format"
26
27
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
30
	deviceCount int
	cudart      *C.cudart_handle_t
31
	nvcuda      *C.nvcuda_handle_t
32
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
33
34
35
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
36
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
37
	deviceCount int
38
39
}

Michael Yang's avatar
Michael Yang committed
40
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
41
42
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
44
45
)

46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
57
58
59
60
61
62
63

	// If any discovered GPUs are incompatible, report why
	unsupportedGPUs []UnsupportedGPUInfo

	// Keep track of errors during bootstrapping so that if GPUs are missing
	// they expected to be present this may explain why
	bootstrapErrors []error
64
)
65

66
// With our current CUDA compile flags, older than 5.0 will not work properly
67
68
69
70
71
// (string values used to allow ldflags overrides at build time)
var (
	CudaComputeMajorMin = "5"
	CudaComputeMinorMin = "0"
)
72

73
var RocmComputeMajorMin = "9"
74

Daniel Hiltgen's avatar
Daniel Hiltgen committed
75
76
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
77

78
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
79
func initCudaHandles() *cudaHandles {
80
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
81

Daniel Hiltgen's avatar
Daniel Hiltgen committed
82
	cHandles := &cudaHandles{}
83
	// Short Circuit if we already know which library to use
84
	// ignore bootstrap errors in this case since we already recorded them
85
	if nvmlLibPath != "" {
86
		cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
87
88
		return cHandles
	}
89
	if nvcudaLibPath != "" {
90
		cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
		return cHandles
92
93
	}
	if cudartLibPath != "" {
94
		cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
		return cHandles
96
97
98
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
99
	var cudartMgmtPatterns []string
100

Daniel Hiltgen's avatar
Daniel Hiltgen committed
101
102
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
Michael Yang's avatar
Michael Yang committed
103
	cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(LibOllamaPath, "cuda_v*", CudartMgmtName))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
104
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
105

Daniel Hiltgen's avatar
Daniel Hiltgen committed
106
107
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
108
		if len(nvmlLibPaths) > 0 {
109
			nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
110
111
112
113
114
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
115
116
117
			if err != nil {
				bootstrapErrors = append(bootstrapErrors, err)
			}
118
119
120
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
121
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
122
	if len(nvcudaLibPaths) > 0 {
123
		deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
124
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
127
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
128
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
129
			return cHandles
130
		}
131
132
133
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
134
135
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
137
	if len(cudartLibPaths) > 0 {
138
		deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
139
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
142
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
143
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
			return cHandles
145
		}
146
147
148
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
149
	}
Wang,Zhe's avatar
Wang,Zhe committed
150

Daniel Hiltgen's avatar
Daniel Hiltgen committed
151
152
153
154
155
156
157
158
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
159
	// ignore bootstrap errors in this case since we already recorded them
Daniel Hiltgen's avatar
Daniel Hiltgen committed
160
	if oneapiLibPath != "" {
161
		oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
162
163
164
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
165
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
166
	if len(oneapiLibPaths) > 0 {
167
168
169
170
171
		var err error
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
172
173
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
174
	return oHandles
175
176
}

177
178
179
180
181
182
183
184
185
186
187
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
188
func GetGPUInfo() GpuInfoList {
189
190
191
192
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
193
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
195
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
196
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
198
199
200
201
202
203
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
204
205
206
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
207
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
209
210
211
212
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
213
		}
214
	}()
215

216
	if !bootstrapped {
217
		slog.Info("looking for compatible GPUs")
218
219
220
221
222
223
224
225
		cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
		}
		cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
		}
226
		bootstrapErrors = []error{}
227
228
		needRefresh = false
		var memInfo C.mem_info_t
229
230
231
232

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
233
		}
Michael Yang's avatar
Michael Yang committed
234

235
236
237
238
		details, err := GetCPUDetails()
		if err != nil {
			slog.Warn("failed to lookup CPU details", "error", err)
		}
Michael Yang's avatar
lint  
Michael Yang committed
239
240
241
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
Michael Yang's avatar
Michael Yang committed
242
243
244
					memInfo: mem,
					Library: "cpu",
					ID:      "0",
Michael Yang's avatar
lint  
Michael Yang committed
245
				},
246
				CPUs: details,
247
			},
Michael Yang's avatar
lint  
Michael Yang committed
248
		}
249
250

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
251
		cHandles = initCudaHandles()
252
253

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
254
255
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
256
257
258
259
260
261
262
263
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
264
265
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
266
267
					driverMajor = int(cHandles.cudart.driver_major)
					driverMinor = int(cHandles.cudart.driver_minor)
268
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
269
270
271
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
272
273
274
275
276
277
278
279
280
281
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
282
283
				gpuInfo.computeMajor = int(memInfo.major)
				gpuInfo.computeMinor = int(memInfo.minor)
284
				gpuInfo.MinimumMemory = cudaMinimumMemory
285
286
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
287

288
289
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])

290
				if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
291
292
293
294
295
296
297
298
					unsupportedGPUs = append(unsupportedGPUs,
						UnsupportedGPUInfo{
							GpuInfo: gpuInfo.GpuInfo,
						})
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
299
300
301
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
302
303
304
					uuid := C.CString(gpuInfo.ID)
					defer C.free(unsafe.Pointer(uuid))
					C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

323
324
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
325
			}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
			// Second pass on NVIDIA GPUs to set lowest common denominator variant and DependencyPaths
			variant := cudaVariant(cudaGPUs)
			var variantPath string
			// Start with our bundled libraries
			if variant != "" {
				variantPath = filepath.Join(LibOllamaPath, "cuda_"+variant)
				if _, err := os.Stat(variantPath); err != nil {
					variantPath = ""
				}
			}

			for i := range cudaGPUs {
				cudaGPUs[i].Variant = variant
				if variantPath != "" {
					// Put the variant directory first in the search path to avoid runtime linking to the wrong library
					cudaGPUs[i].DependencyPath = append([]string{variantPath}, cudaGPUs[i].DependencyPath...)
				}
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
344
345
346
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
347
		if envconfig.IntelGPU() {
348
			oHandles = initOneAPIHandles()
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Michael Yang's avatar
Michael Yang committed
374
						gpuInfo.DependencyPath = []string{LibOllamaPath}
375
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
376
					}
377
378
379
380
				}
			}
		}

381
		rocmGPUs, err = AMDGetGPUInfo()
382
383
384
385
386
387
388
389
390

		// The ID field is used in context of the filtered set of GPUS
		// so we have to replace any of these numeric IDs with their
		// placement in this set of GPUs
		for i := range rocmGPUs {
			if _, err := strconv.Atoi(rocmGPUs[i].ID); err == nil {
				rocmGPUs[i].ID = strconv.Itoa(i)
			}
		}
391
392
393
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
394
		bootstrapped = true
395
396
397
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
398
399

		// TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
400
401
402
403
404
405
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
406
407
408
409
410
411
412
413
414
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
415
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
416
417
418
419
420
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
421
					"free_swap", format.HumanBytes2(mem.FreeSwap),
422
423
424
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
425
			cpus[0].FreeSwap = mem.FreeSwap
426
427
		}

428
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
429
430
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
431
432
		}
		for i, gpu := range cudaGPUs {
433
			if cHandles.nvml != nil {
434
435
436
				uuid := C.CString(gpu.ID)
				defer C.free(unsafe.Pointer(uuid))
				C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
437
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
438
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
439
440
441
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
442
			} else {
443
444
445
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
446
447
			}
			if memInfo.err != nil {
448
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
449
450
451
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
452
453
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
454
455
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
456
457
458
459
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
460
461
462
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
463
				"overhead", format.HumanBytes2(gpu.OSOverhead),
464
465
466
467
468
469
470
471
472
473
474
475
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
476
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
477
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

495
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
496
497
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
498
		}
499
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
500

501
502
503
504
505
506
507
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
508
509
510
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
511
	if len(resp) == 0 {
512
		resp = append(resp, cpus[0].GpuInfo)
513
514
515
516
	}
	return resp
}

517
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
518
519
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
520
	slog.Debug("Searching for GPU library", "name", baseLibName)
521

Michael Yang's avatar
Michael Yang committed
522
523
	// search our bundled libraries first
	patterns := []string{filepath.Join(LibOllamaPath, baseLibName)}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
524

Michael Yang's avatar
Michael Yang committed
525
	var ldPaths []string
526
527
	switch runtime.GOOS {
	case "windows":
Michael Yang's avatar
Michael Yang committed
528
		ldPaths = strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
529
	case "linux":
Michael Yang's avatar
Michael Yang committed
530
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), string(os.PathListSeparator))
531
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
532

Michael Yang's avatar
Michael Yang committed
533
534
535
	// then search the system's LD_LIBRARY_PATH
	for _, p := range ldPaths {
		p, err := filepath.Abs(p)
536
537
538
		if err != nil {
			continue
		}
Michael Yang's avatar
Michael Yang committed
539
		patterns = append(patterns, filepath.Join(p, baseLibName))
540
	}
Michael Yang's avatar
Michael Yang committed
541
542

	// finally, search the default patterns provided by the caller
543
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
544
	slog.Debug("gpu library search", "globs", patterns)
545
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
546
547
548
		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
549
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
550
		}
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
577
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
578
579
580
	return gpuLibPaths
}

581
582
583
// Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
584
	var resp C.cudart_init_resp_t
585
	resp.ch.verbose = getVerboseState()
586
	var err error
587
	for _, libPath := range cudartLibPaths {
588
589
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
590
		C.cudart_init(lib, &resp)
591
		if resp.err != nil {
592
593
			err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
594
595
			C.free(unsafe.Pointer(resp.err))
		} else {
596
597
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
598
599
		}
	}
600
	return 0, nil, "", err
601
602
}

603
604
605
// Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
606
607
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
608
	var err error
609
610
611
612
613
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
614
615
616
			// Decide what log level based on the type of error message to help users understand why
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
617
618
				err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
				slog.Warn(err.Error())
619
			case C.CUDA_ERROR_NO_DEVICE:
620
621
				err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
				slog.Info(err.Error())
622
			case C.CUDA_ERROR_UNKNOWN:
623
624
				err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
				slog.Warn(err.Error())
625
			default:
626
				msg := C.GoString(resp.err)
627
628
629
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
630
631
					err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
					slog.Info(err.Error())
632
633
				}
			}
634
635
			C.free(unsafe.Pointer(resp.err))
		} else {
636
637
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
638
639
		}
	}
640
	return 0, nil, "", err
641
642
}

643
644
645
// Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
646
647
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
648
	var err error
649
650
651
652
653
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
654
655
			err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
			slog.Info(err.Error())
656
657
			C.free(unsafe.Pointer(resp.err))
		} else {
658
659
			err = nil
			return &resp.ch, libPath, err
660
661
		}
	}
662
	return nil, "", err
663
664
}

665
666
667
// bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
Wang,Zhe's avatar
Wang,Zhe committed
668
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
669
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
670
	resp.oh.verbose = getVerboseState()
671
	var err error
Wang,Zhe's avatar
Wang,Zhe committed
672
673
674
675
676
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
677
678
			err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
Wang,Zhe's avatar
Wang,Zhe committed
679
680
			C.free(unsafe.Pointer(resp.err))
		} else {
681
			err = nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
682
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
683
684
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
685
			return num_devices, &resp.oh, libPath, err
Wang,Zhe's avatar
Wang,Zhe committed
686
687
		}
	}
688
	return 0, nil, "", err
Wang,Zhe's avatar
Wang,Zhe committed
689
690
}

691
func getVerboseState() C.uint16_t {
692
	if envconfig.LogLevel() < slog.LevelInfo {
693
694
695
696
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
697
698
699

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
700
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
701
	if len(l) == 0 {
702
		return nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
703
	}
704
705
706
707
	vd := []string{}
	// Only filter the AMD GPUs at this level, let all NVIDIA devices through
	if tmp := rocmGetVisibleDevicesEnv(l); tmp != "" {
		vd = append(vd, tmp)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
708
	}
709
	return vd
Daniel Hiltgen's avatar
Daniel Hiltgen committed
710
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
711

712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
func GetSystemInfo() SystemInfo {
	gpus := GetGPUInfo()
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
	discoveryErrors := []string{}
	for _, err := range bootstrapErrors {
		discoveryErrors = append(discoveryErrors, err.Error())
	}
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
		System:          cpus[0],
		GPUs:            gpus,
		UnsupportedGPUs: unsupportedGPUs,
		DiscoveryErrors: discoveryErrors,
	}
}