gpu.go 20 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32
33
34
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
35
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
	deviceCount int
37
38
}

Michael Yang's avatar
Michael Yang committed
39
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
40
41
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
43
44
)

45
46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
58

59
60
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
61

Daniel Hiltgen's avatar
Daniel Hiltgen committed
62
var RocmComputeMin = 9
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
65
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
66

67
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
68
func initCudaHandles() *cudaHandles {
69
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
70

Daniel Hiltgen's avatar
Daniel Hiltgen committed
71
	cHandles := &cudaHandles{}
72
	// Short Circuit if we already know which library to use
73
74
75
76
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
77
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
78
79
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
80
81
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
82
83
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
84
85
86
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
87
	var cudartMgmtPatterns []string
88

Daniel Hiltgen's avatar
Daniel Hiltgen committed
89
90
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
91

Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
93
94
95
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
96
97
98
	libDir := LibraryDir()
	if libDir != "" {
		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
99
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
101

Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
103
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
104
105
106
107
108
109
110
111
112
113
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
114
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
115
116
117
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
118
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
119
120
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
121
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
			return cHandles
123
124
125
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
127
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
128
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
129
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
130
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
132
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
133
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
134
			return cHandles
135
136
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
137

Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
139
140
141
142
143
144
145
146
147
148
149
150
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
151
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
152
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
153
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
154
155
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
156
	return oHandles
157
158
}

159
160
161
162
163
164
165
166
167
168
169
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
170
func GetGPUInfo() GpuInfoList {
171
172
173
174
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
175
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
177
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
178
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
179
180
181
182
183
184
185
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
186
187
188
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
189
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
190
191
192
193
194
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
195
		}
196
	}()
197

198
	if !bootstrapped {
199
		slog.Info("looking for compatible GPUs")
200
		needRefresh = false
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
		cpuCapability = GetCPUCapability()
202
		var memInfo C.mem_info_t
203
204
205
206

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
207
		}
Michael Yang's avatar
lint  
Michael Yang committed
208
209
210
211
212
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
					memInfo: mem,
					Library: "cpu",
213
					Variant: cpuCapability.String(),
Michael Yang's avatar
lint  
Michael Yang committed
214
215
					ID:      "0",
				},
216
			},
Michael Yang's avatar
lint  
Michael Yang committed
217
		}
218
219
220

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
221
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
222
223
224
225
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
226

Daniel Hiltgen's avatar
Daniel Hiltgen committed
227
		depPath := LibraryDir()
228
229

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
230
		cHandles = initCudaHandles()
231
232

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
233
234
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
235
236
237
238
239
240
241
242
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
243
244
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
245
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
246
247
248
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
249
250
251
252
253
254
255
256
257
258
259
260
261
262
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
263
264
				gpuInfo.computeMajor = int(memInfo.major)
				gpuInfo.computeMinor = int(memInfo.minor)
265
				gpuInfo.MinimumMemory = cudaMinimumMemory
266
267
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
				variant := cudaVariant(gpuInfo)
269
270
271
				if depPath != "" {
					gpuInfo.DependencyPath = depPath
					// Check for variant specific directory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
272
273
274
					if variant != "" {
						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
275
276
277
						}
					}
				}
278
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
279
				gpuInfo.Variant = variant
280

Daniel Hiltgen's avatar
Daniel Hiltgen committed
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
					C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

303
304
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
305
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
306
307
308
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
309
		if envconfig.IntelGPU() {
310
			oHandles = initOneAPIHandles()
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
						gpuInfo.DependencyPath = depPath
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
338
					}
339
340
341
342
343
344
				}
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
345
346
347
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
348
349
350
351
352
353
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
354
355
356
357
358
359
360
361
362
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
363
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
364
365
366
367
368
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
369
					"free_swap", format.HumanBytes2(mem.FreeSwap),
370
371
372
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
373
			cpus[0].FreeSwap = mem.FreeSwap
374
375
		}

376
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
377
378
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
379
380
		}
		for i, gpu := range cudaGPUs {
381
382
383
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
384
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
385
386
387
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
388
			} else {
389
390
391
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
392
393
			}
			if memInfo.err != nil {
394
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
395
396
397
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
398
399
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
400
401
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
402
403
404
405
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
406
407
408
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
409
				"overhead", format.HumanBytes2(gpu.OSOverhead),
410
411
412
413
414
415
416
417
418
419
420
421
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
422
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
423
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

441
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
442
443
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
444
		}
445
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
446

447
448
449
450
451
452
453
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
454
455
456
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
457
	if len(resp) == 0 {
458
		resp = append(resp, cpus[0].GpuInfo)
459
460
461
462
	}
	return resp
}

463
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
464
465
466
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
467
	slog.Debug("Searching for GPU library", "name", baseLibName)
468

Daniel Hiltgen's avatar
Daniel Hiltgen committed
469
	// Start with our bundled libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
470
	patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
471

472
473
474
475
476
477
478
479
	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
480
481

	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
482
483
484
485
486
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
487
		patterns = append(patterns, filepath.Join(d, baseLibName))
488
	}
489
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
490
	slog.Debug("gpu library search", "globs", patterns)
491
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
492
493
494
495

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
496
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
497
		}
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
524
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
525
526
527
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
528
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
529
	var resp C.cudart_init_resp_t
530
	resp.ch.verbose = getVerboseState()
531
	for _, libPath := range cudartLibPaths {
532
533
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
534
		C.cudart_init(lib, &resp)
535
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
536
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
537
538
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
539
			return int(resp.num_devices), &resp.ch, libPath
540
541
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
542
	return 0, nil, ""
543
544
}

545
546
547
548
549
550
551
552
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
			// Decide what log level based on the type of error message to help users understand why
			msg := C.GoString(resp.err)
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
			case C.CUDA_ERROR_NO_DEVICE:
				slog.Info("no nvidia devices detected", "library", libPath)
			case C.CUDA_ERROR_UNKNOWN:
				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
			default:
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
				}
			}
570
571
572
573
574
575
576
577
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
595
596
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
597
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
598
599
600
601
602
603
604
605
606
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
607
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
608
609
610
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
611
612
613
614
615
		}
	}
	return 0, nil, ""
}

616
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
617
	if envconfig.Debug() {
618
619
620
621
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
622
623
624
625
626
627
628
629
630
631
632
633
634
635

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
636
637
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
638
639
640
641
642
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
643

Daniel Hiltgen's avatar
Daniel Hiltgen committed
644
func LibraryDir() string {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
645
646
647
648
649
650
651
652
653
654
	// On Windows/linux we bundle the dependencies at the same level as the executable
	appExe, err := os.Executable()
	if err != nil {
		slog.Warn("failed to lookup executable path", "error", err)
	}
	cwd, err := os.Getwd()
	if err != nil {
		slog.Warn("failed to lookup working directory", "error", err)
	}
	// Scan for any of our dependeices, and pick first match
655
	for _, root := range []string{filepath.Dir(appExe), filepath.Join(filepath.Dir(appExe), envconfig.LibRelativeToExe()), cwd} {
656
		libDep := filepath.Join("lib", "ollama")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
657
658
659
660
661
662
663
664
665
666
667
668
669
670
		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
			return filepath.Join(root, libDep)
		}
		// Developer mode, local build
		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
	}
	slog.Warn("unable to locate gpu dependency libraries")
	return ""
}