gpu.go 21.3 KB
Newer Older
1
2
//go:build linux || windows

3
package discover
4
5

/*
Michael Yang's avatar
Michael Yang committed
6
#cgo CPPFLAGS: -O3
7
8
9
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

10
11
12
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
13

14
15
import (
	"fmt"
16
	"log/slog"
17
18
	"os"
	"path/filepath"
19
	"runtime"
20
	"strconv"
21
	"strings"
22
23
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
24

25
	"github.com/ollama/ollama/envconfig"
26
	"github.com/ollama/ollama/format"
27
28
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
31
	deviceCount int
	cudart      *C.cudart_handle_t
32
	nvcuda      *C.nvcuda_handle_t
33
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
36
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
37
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	deviceCount int
39
40
}

Michael Yang's avatar
Michael Yang committed
41
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
45
46
)

47
48
49
50
51
52
53
54
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
55
	nvmlLibPath   string
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
58
59
60
61
62
63
64

	// If any discovered GPUs are incompatible, report why
	unsupportedGPUs []UnsupportedGPUInfo

	// Keep track of errors during bootstrapping so that if GPUs are missing
	// they expected to be present this may explain why
	bootstrapErrors []error
65
)
66

67
// With our current CUDA compile flags, older than 5.0 will not work properly
68
69
70
71
72
// (string values used to allow ldflags overrides at build time)
var (
	CudaComputeMajorMin = "5"
	CudaComputeMinorMin = "0"
)
73

74
var RocmComputeMajorMin = "9"
75

Daniel Hiltgen's avatar
Daniel Hiltgen committed
76
77
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
78

79
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
func initCudaHandles() *cudaHandles {
81
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
82

Daniel Hiltgen's avatar
Daniel Hiltgen committed
83
	cHandles := &cudaHandles{}
84
	// Short Circuit if we already know which library to use
85
	// ignore bootstrap errors in this case since we already recorded them
86
	if nvmlLibPath != "" {
87
		cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
88
89
		return cHandles
	}
90
	if nvcudaLibPath != "" {
91
		cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
		return cHandles
93
94
	}
	if cudartLibPath != "" {
95
		cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
		return cHandles
97
98
99
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
100
	var cudartMgmtPatterns []string
101

Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
103
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
Michael Yang's avatar
Michael Yang committed
104
	cudartMgmtPatterns = append(cudartMgmtPatterns, filepath.Join(LibOllamaPath, "cuda_v*", CudartMgmtName))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
105
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
106

Daniel Hiltgen's avatar
Daniel Hiltgen committed
107
108
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
109
		if len(nvmlLibPaths) > 0 {
110
			nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
111
112
113
114
115
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
116
117
118
			if err != nil {
				bootstrapErrors = append(bootstrapErrors, err)
			}
119
120
121
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
123
	if len(nvcudaLibPaths) > 0 {
124
		deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
125
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
127
128
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
129
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
130
			return cHandles
131
		}
132
133
134
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
135
136
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
138
	if len(cudartLibPaths) > 0 {
139
		deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
140
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
142
143
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
144
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
			return cHandles
146
		}
147
148
149
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
150
	}
Wang,Zhe's avatar
Wang,Zhe committed
151

Daniel Hiltgen's avatar
Daniel Hiltgen committed
152
153
154
155
156
157
158
159
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
160
	// ignore bootstrap errors in this case since we already recorded them
Daniel Hiltgen's avatar
Daniel Hiltgen committed
161
	if oneapiLibPath != "" {
162
		oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
163
164
165
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
166
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
167
	if len(oneapiLibPaths) > 0 {
168
169
170
171
172
		var err error
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
173
174
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
175
	return oHandles
176
177
}

178
179
180
181
182
183
184
185
186
187
188
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
189
func GetGPUInfo() GpuInfoList {
190
191
192
193
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
194
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
195
196
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
197
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
198
199
200
201
202
203
204
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
205
206
207
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
208
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
209
210
211
212
213
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
214
		}
215
	}()
216

217
	if !bootstrapped {
218
		slog.Info("looking for compatible GPUs")
219
220
221
222
223
224
225
226
		cudaComputeMajorMin, err := strconv.Atoi(CudaComputeMajorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err)
		}
		cudaComputeMinorMin, err := strconv.Atoi(CudaComputeMinorMin)
		if err != nil {
			slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err)
		}
227
		bootstrapErrors = []error{}
228
229
		needRefresh = false
		var memInfo C.mem_info_t
230
231
232
233

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
234
		}
Michael Yang's avatar
Michael Yang committed
235

236
237
238
239
		details, err := GetCPUDetails()
		if err != nil {
			slog.Warn("failed to lookup CPU details", "error", err)
		}
Michael Yang's avatar
lint  
Michael Yang committed
240
241
242
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
Michael Yang's avatar
Michael Yang committed
243
244
245
					memInfo: mem,
					Library: "cpu",
					ID:      "0",
Michael Yang's avatar
lint  
Michael Yang committed
246
				},
247
				CPUs: details,
248
			},
Michael Yang's avatar
lint  
Michael Yang committed
249
		}
250
251

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
252
		cHandles = initCudaHandles()
253
254

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
255
256
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
257
258
259
260
261
262
263
264
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
265
266
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
267
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
269
270
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
271
272
273
274
275
276
277
278
279
280
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
281
282
				gpuInfo.computeMajor = int(memInfo.major)
				gpuInfo.computeMinor = int(memInfo.minor)
283
				gpuInfo.MinimumMemory = cudaMinimumMemory
284
285
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
Daniel Hiltgen's avatar
Daniel Hiltgen committed
286
				variant := cudaVariant(gpuInfo)
Michael Yang's avatar
Michael Yang committed
287
288
289
290
291
292
293

				// Start with our bundled libraries
				if variant != "" {
					variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
					if _, err := os.Stat(variantPath); err == nil {
						// Put the variant directory first in the search path to avoid runtime linking to the wrong library
						gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
294
295
					}
				}
296
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
297
				gpuInfo.Variant = variant
298

299
				if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
300
301
302
303
304
305
306
307
					unsupportedGPUs = append(unsupportedGPUs,
						UnsupportedGPUInfo{
							GpuInfo: gpuInfo.GpuInfo,
						})
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
308
309
310
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
311
312
313
					uuid := C.CString(gpuInfo.ID)
					defer C.free(unsafe.Pointer(uuid))
					C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

332
333
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
334
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
335
336
337
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
338
		if envconfig.IntelGPU() {
339
			oHandles = initOneAPIHandles()
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Michael Yang's avatar
Michael Yang committed
365
						gpuInfo.DependencyPath = []string{LibOllamaPath}
366
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
367
					}
368
369
370
371
				}
			}
		}

372
373
374
375
		rocmGPUs, err = AMDGetGPUInfo()
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
376
		bootstrapped = true
377
378
379
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
380
381

		// TODO verify we have runners for the discovered GPUs, filter out any that aren't supported with good error messages
382
383
384
385
386
387
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
388
389
390
391
392
393
394
395
396
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
397
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
398
399
400
401
402
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
403
					"free_swap", format.HumanBytes2(mem.FreeSwap),
404
405
406
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
407
			cpus[0].FreeSwap = mem.FreeSwap
408
409
		}

410
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
411
412
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
413
414
		}
		for i, gpu := range cudaGPUs {
415
			if cHandles.nvml != nil {
416
417
418
				uuid := C.CString(gpu.ID)
				defer C.free(unsafe.Pointer(uuid))
				C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
419
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
420
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
421
422
423
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
424
			} else {
425
426
427
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
428
429
			}
			if memInfo.err != nil {
430
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
431
432
433
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
434
435
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
436
437
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
438
439
440
441
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
442
443
444
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
445
				"overhead", format.HumanBytes2(gpu.OSOverhead),
446
447
448
449
450
451
452
453
454
455
456
457
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
458
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
459
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

477
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
478
479
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
480
		}
481
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
482

483
484
485
486
487
488
489
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
490
491
492
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
493
	if len(resp) == 0 {
494
		resp = append(resp, cpus[0].GpuInfo)
495
496
497
498
	}
	return resp
}

499
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
500
501
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
502
	slog.Debug("Searching for GPU library", "name", baseLibName)
503

Michael Yang's avatar
Michael Yang committed
504
505
	// search our bundled libraries first
	patterns := []string{filepath.Join(LibOllamaPath, baseLibName)}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
506

Michael Yang's avatar
Michael Yang committed
507
	var ldPaths []string
508
509
	switch runtime.GOOS {
	case "windows":
Michael Yang's avatar
Michael Yang committed
510
		ldPaths = strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
511
	case "linux":
Michael Yang's avatar
Michael Yang committed
512
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), string(os.PathListSeparator))
513
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
514

Michael Yang's avatar
Michael Yang committed
515
516
517
	// then search the system's LD_LIBRARY_PATH
	for _, p := range ldPaths {
		p, err := filepath.Abs(p)
518
519
520
		if err != nil {
			continue
		}
Michael Yang's avatar
Michael Yang committed
521
		patterns = append(patterns, filepath.Join(p, baseLibName))
522
	}
Michael Yang's avatar
Michael Yang committed
523
524

	// finally, search the default patterns provided by the caller
525
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
526
	slog.Debug("gpu library search", "globs", patterns)
527
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
528
529
530
		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
531
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
532
		}
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
559
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
560
561
562
	return gpuLibPaths
}

563
564
565
// Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
566
	var resp C.cudart_init_resp_t
567
	resp.ch.verbose = getVerboseState()
568
	var err error
569
	for _, libPath := range cudartLibPaths {
570
571
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
572
		C.cudart_init(lib, &resp)
573
		if resp.err != nil {
574
575
			err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
576
577
			C.free(unsafe.Pointer(resp.err))
		} else {
578
579
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
580
581
		}
	}
582
	return 0, nil, "", err
583
584
}

585
586
587
// Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
588
589
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
590
	var err error
591
592
593
594
595
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
596
597
598
			// Decide what log level based on the type of error message to help users understand why
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
599
600
				err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
				slog.Warn(err.Error())
601
			case C.CUDA_ERROR_NO_DEVICE:
602
603
				err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
				slog.Info(err.Error())
604
			case C.CUDA_ERROR_UNKNOWN:
605
606
				err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
				slog.Warn(err.Error())
607
			default:
608
				msg := C.GoString(resp.err)
609
610
611
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
612
613
					err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
					slog.Info(err.Error())
614
615
				}
			}
616
617
			C.free(unsafe.Pointer(resp.err))
		} else {
618
619
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
620
621
		}
	}
622
	return 0, nil, "", err
623
624
}

625
626
627
// Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
628
629
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
630
	var err error
631
632
633
634
635
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
636
637
			err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
			slog.Info(err.Error())
638
639
			C.free(unsafe.Pointer(resp.err))
		} else {
640
641
			err = nil
			return &resp.ch, libPath, err
642
643
		}
	}
644
	return nil, "", err
645
646
}

647
648
649
// bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
Wang,Zhe's avatar
Wang,Zhe committed
650
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
651
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
652
	resp.oh.verbose = getVerboseState()
653
	var err error
Wang,Zhe's avatar
Wang,Zhe committed
654
655
656
657
658
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
659
660
			err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
Wang,Zhe's avatar
Wang,Zhe committed
661
662
			C.free(unsafe.Pointer(resp.err))
		} else {
663
			err = nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
664
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
665
666
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
667
			return num_devices, &resp.oh, libPath, err
Wang,Zhe's avatar
Wang,Zhe committed
668
669
		}
	}
670
	return 0, nil, "", err
Wang,Zhe's avatar
Wang,Zhe committed
671
672
}

673
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
674
	if envconfig.Debug() {
675
676
677
678
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
679
680
681
682
683
684
685
686
687
688
689
690
691
692

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
693
694
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
695
696
697
698
699
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
700

701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
func GetSystemInfo() SystemInfo {
	gpus := GetGPUInfo()
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
	discoveryErrors := []string{}
	for _, err := range bootstrapErrors {
		discoveryErrors = append(discoveryErrors, err.Error())
	}
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
		System:          cpus[0],
		GPUs:            gpus,
		UnsupportedGPUs: unsupportedGPUs,
		DiscoveryErrors: discoveryErrors,
	}
}