gpu.go 22.5 KB
Newer Older
1
2
//go:build linux || windows

3
package discover
4
5

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32
33
34
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
35
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
	deviceCount int
37
38
}

Michael Yang's avatar
Michael Yang committed
39
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
40
41
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
43
44
)

45
46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
57
58
59
60
61
62
63

	// If any discovered GPUs are incompatible, report why
	unsupportedGPUs []UnsupportedGPUInfo

	// Keep track of errors during bootstrapping so that if GPUs are missing
	// they expected to be present this may explain why
	bootstrapErrors []error
64
)
65

66
67
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
68

Daniel Hiltgen's avatar
Daniel Hiltgen committed
69
var RocmComputeMin = 9
70

Daniel Hiltgen's avatar
Daniel Hiltgen committed
71
72
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
73

74
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
75
func initCudaHandles() *cudaHandles {
76
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
77

Daniel Hiltgen's avatar
Daniel Hiltgen committed
78
	cHandles := &cudaHandles{}
79
	// Short Circuit if we already know which library to use
80
	// ignore bootstrap errors in this case since we already recorded them
81
	if nvmlLibPath != "" {
82
		cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
83
84
		return cHandles
	}
85
	if nvcudaLibPath != "" {
86
		cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
87
		return cHandles
88
89
	}
	if cudartLibPath != "" {
90
		cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
		return cHandles
92
93
94
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
95
	var cudartMgmtPatterns []string
96

Daniel Hiltgen's avatar
Daniel Hiltgen committed
97
98
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
99

Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
101
102
103
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
104
105
106
	libDir := LibraryDir()
	if libDir != "" {
		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
107
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
108
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
109

Daniel Hiltgen's avatar
Daniel Hiltgen committed
110
111
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
112
		if len(nvmlLibPaths) > 0 {
113
			nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
114
115
116
117
118
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
119
120
121
			if err != nil {
				bootstrapErrors = append(bootstrapErrors, err)
			}
122
123
124
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
126
	if len(nvcudaLibPaths) > 0 {
127
		deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
128
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
129
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
130
131
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
132
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
			return cHandles
134
		}
135
136
137
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
138
139
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
141
	if len(cudartLibPaths) > 0 {
142
		deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
143
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
146
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
147
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
148
			return cHandles
149
		}
150
151
152
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
153
	}
Wang,Zhe's avatar
Wang,Zhe committed
154

Daniel Hiltgen's avatar
Daniel Hiltgen committed
155
156
157
158
159
160
161
162
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
163
	// ignore bootstrap errors in this case since we already recorded them
Daniel Hiltgen's avatar
Daniel Hiltgen committed
164
	if oneapiLibPath != "" {
165
		oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
Daniel Hiltgen's avatar
Daniel Hiltgen committed
166
167
168
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
169
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
170
	if len(oneapiLibPaths) > 0 {
171
172
173
174
175
		var err error
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
176
177
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
178
	return oHandles
179
180
}

181
182
183
184
185
186
187
188
189
190
191
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
192
func GetGPUInfo() GpuInfoList {
193
194
195
196
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
197
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
198
199
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
200
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
202
203
204
205
206
207
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
208
209
210
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
211
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
212
213
214
215
216
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
217
		}
218
	}()
219

220
	if !bootstrapped {
221
		slog.Info("looking for compatible GPUs")
222
		bootstrapErrors = []error{}
223
		needRefresh = false
Daniel Hiltgen's avatar
Daniel Hiltgen committed
224
		cpuCapability = GetCPUCapability()
225
		var memInfo C.mem_info_t
226
227
228
229

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
230
		}
231
		depPath := LibraryDir()
232
233
234
235
		details, err := GetCPUDetails()
		if err != nil {
			slog.Warn("failed to lookup CPU details", "error", err)
		}
Michael Yang's avatar
lint  
Michael Yang committed
236
237
238
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
239
240
241
242
243
					memInfo:        mem,
					Library:        "cpu",
					Variant:        cpuCapability.String(),
					ID:             "0",
					DependencyPath: depPath,
Michael Yang's avatar
lint  
Michael Yang committed
244
				},
245
				CPUs: details,
246
			},
Michael Yang's avatar
lint  
Michael Yang committed
247
		}
248
249
250

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
251
252
253
			err := fmt.Errorf("CPU does not have minimum vector extensions, GPU inference disabled.  Required:%s  Detected:%s", GPURunnerCPUCapability, cpuCapability)
			slog.Warn(err.Error())
			bootstrapErrors = append(bootstrapErrors, err)
254
255
256
257
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
258

259
		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
260
		cHandles = initCudaHandles()
261
262

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
263
264
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
265
266
267
268
269
270
271
272
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
273
274
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
275
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
276
277
278
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
279
280
281
282
283
284
285
286
287
288
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
289
290
				gpuInfo.computeMajor = int(memInfo.major)
				gpuInfo.computeMinor = int(memInfo.minor)
291
				gpuInfo.MinimumMemory = cudaMinimumMemory
292
293
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
Daniel Hiltgen's avatar
Daniel Hiltgen committed
294
				variant := cudaVariant(gpuInfo)
295
296
297
				if depPath != "" {
					gpuInfo.DependencyPath = depPath
					// Check for variant specific directory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
298
299
300
					if variant != "" {
						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
301
302
303
						}
					}
				}
304
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
305
				gpuInfo.Variant = variant
306

307
308
309
310
311
312
313
314
315
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					unsupportedGPUs = append(unsupportedGPUs,
						UnsupportedGPUInfo{
							GpuInfo: gpuInfo.GpuInfo,
						})
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
316
317
318
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
319
320
321
					uuid := C.CString(gpuInfo.ID)
					defer C.free(unsafe.Pointer(uuid))
					C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

340
341
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
342
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
343
344
345
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
346
		if envconfig.IntelGPU() {
347
			oHandles = initOneAPIHandles()
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
						gpuInfo.DependencyPath = depPath
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
375
					}
376
377
378
379
				}
			}
		}

380
381
382
383
		rocmGPUs, err = AMDGetGPUInfo()
		if err != nil {
			bootstrapErrors = append(bootstrapErrors, err)
		}
384
		bootstrapped = true
385
386
387
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
388
389
390
391
392
393
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
394
395
396
397
398
399
400
401
402
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
403
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
404
405
406
407
408
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
409
					"free_swap", format.HumanBytes2(mem.FreeSwap),
410
411
412
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
413
			cpus[0].FreeSwap = mem.FreeSwap
414
415
		}

416
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
417
418
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
419
420
		}
		for i, gpu := range cudaGPUs {
421
			if cHandles.nvml != nil {
422
423
424
				uuid := C.CString(gpu.ID)
				defer C.free(unsafe.Pointer(uuid))
				C.nvml_get_free(*cHandles.nvml, uuid, &memInfo.free, &memInfo.total, &memInfo.used)
425
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
426
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
427
428
429
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
430
			} else {
431
432
433
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
434
435
			}
			if memInfo.err != nil {
436
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
437
438
439
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
440
441
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
442
443
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
444
445
446
447
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
448
449
450
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
451
				"overhead", format.HumanBytes2(gpu.OSOverhead),
452
453
454
455
456
457
458
459
460
461
462
463
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
464
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
465
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

483
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
484
485
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
486
		}
487
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
488

489
490
491
492
493
494
495
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
496
497
498
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
499
	if len(resp) == 0 {
500
		resp = append(resp, cpus[0].GpuInfo)
501
502
503
504
	}
	return resp
}

505
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
506
507
508
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
509
	slog.Debug("Searching for GPU library", "name", baseLibName)
510

Daniel Hiltgen's avatar
Daniel Hiltgen committed
511
	// Start with our bundled libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
512
	patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
513

514
515
516
517
518
519
520
521
	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
522
523

	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
524
525
526
527
528
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
529
		patterns = append(patterns, filepath.Join(d, baseLibName))
530
	}
531
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
532
	slog.Debug("gpu library search", "globs", patterns)
533
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
534
535
536
537

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
538
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
539
		}
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
566
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
567
568
569
	return gpuLibPaths
}

570
571
572
// Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
573
	var resp C.cudart_init_resp_t
574
	resp.ch.verbose = getVerboseState()
575
	var err error
576
	for _, libPath := range cudartLibPaths {
577
578
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
579
		C.cudart_init(lib, &resp)
580
		if resp.err != nil {
581
582
			err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
583
584
			C.free(unsafe.Pointer(resp.err))
		} else {
585
586
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
587
588
		}
	}
589
	return 0, nil, "", err
590
591
}

592
593
594
// Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
595
596
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
597
	var err error
598
599
600
601
602
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
603
604
605
			// Decide what log level based on the type of error message to help users understand why
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
606
607
				err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
				slog.Warn(err.Error())
608
			case C.CUDA_ERROR_NO_DEVICE:
609
610
				err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
				slog.Info(err.Error())
611
			case C.CUDA_ERROR_UNKNOWN:
612
613
				err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
				slog.Warn(err.Error())
614
			default:
615
				msg := C.GoString(resp.err)
616
617
618
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
619
620
					err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
					slog.Info(err.Error())
621
622
				}
			}
623
624
			C.free(unsafe.Pointer(resp.err))
		} else {
625
626
			err = nil
			return int(resp.num_devices), &resp.ch, libPath, err
627
628
		}
	}
629
	return 0, nil, "", err
630
631
}

632
633
634
// Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
635
636
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
637
	var err error
638
639
640
641
642
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
643
644
			err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
			slog.Info(err.Error())
645
646
			C.free(unsafe.Pointer(resp.err))
		} else {
647
648
			err = nil
			return &resp.ch, libPath, err
649
650
		}
	}
651
	return nil, "", err
652
653
}

654
655
656
// bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
Wang,Zhe's avatar
Wang,Zhe committed
657
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
658
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
659
	resp.oh.verbose = getVerboseState()
660
	var err error
Wang,Zhe's avatar
Wang,Zhe committed
661
662
663
664
665
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
666
667
			err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
			slog.Debug(err.Error())
Wang,Zhe's avatar
Wang,Zhe committed
668
669
			C.free(unsafe.Pointer(resp.err))
		} else {
670
			err = nil
Daniel Hiltgen's avatar
Daniel Hiltgen committed
671
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
672
673
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
674
			return num_devices, &resp.oh, libPath, err
Wang,Zhe's avatar
Wang,Zhe committed
675
676
		}
	}
677
	return 0, nil, "", err
Wang,Zhe's avatar
Wang,Zhe committed
678
679
}

680
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
681
	if envconfig.Debug() {
682
683
684
685
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
686
687
688
689
690
691
692
693
694
695
696
697
698
699

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
700
701
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
702
703
704
705
706
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
707

Daniel Hiltgen's avatar
Daniel Hiltgen committed
708
func LibraryDir() string {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
709
710
711
712
713
714
715
716
717
718
	// On Windows/linux we bundle the dependencies at the same level as the executable
	appExe, err := os.Executable()
	if err != nil {
		slog.Warn("failed to lookup executable path", "error", err)
	}
	cwd, err := os.Getwd()
	if err != nil {
		slog.Warn("failed to lookup working directory", "error", err)
	}
	// Scan for any of our dependeices, and pick first match
719
	for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
720
		libDep := filepath.Join("lib", "ollama")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
721
722
723
724
725
726
727
728
729
730
731
732
733
734
		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
			return filepath.Join(root, libDep)
		}
		// Developer mode, local build
		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
	}
	slog.Warn("unable to locate gpu dependency libraries")
	return ""
}
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754

func GetSystemInfo() SystemInfo {
	gpus := GetGPUInfo()
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
	discoveryErrors := []string{}
	for _, err := range bootstrapErrors {
		discoveryErrors = append(discoveryErrors, err.Error())
	}
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
		System:          cpus[0],
		GPUs:            gpus,
		UnsupportedGPUs: unsupportedGPUs,
		DiscoveryErrors: discoveryErrors,
	}
}