gpu.go 19.8 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
32
33
34
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
35
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
	deviceCount int
37
38
}

Michael Yang's avatar
Michael Yang committed
39
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
40
41
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
43
44
)

45
46
47
48
49
50
51
52
53
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
54
	nvmlLibPath   string
55
56
57
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
58

59
60
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
61

Daniel Hiltgen's avatar
Daniel Hiltgen committed
62
var RocmComputeMin = 9
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
65
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
66

67
68
69
70
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

71
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
72
func initCudaHandles() *cudaHandles {
73
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
74

Daniel Hiltgen's avatar
Daniel Hiltgen committed
75
	cHandles := &cudaHandles{}
76
	// Short Circuit if we already know which library to use
77
78
79
80
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
81
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
82
83
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
84
85
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
86
87
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
88
89
90
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
91
	var cudartMgmtPatterns []string
92

Daniel Hiltgen's avatar
Daniel Hiltgen committed
93
94
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
95

Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
97
98
99
100
101
102
103
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
	tmpDir, _ := PayloadsDir()
	if tmpDir != "" {
		// TODO - add "payloads" for subprocess
		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
104
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
105
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
106

Daniel Hiltgen's avatar
Daniel Hiltgen committed
107
108
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
109
110
111
112
113
114
115
116
117
118
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
119
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
120
121
122
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
123
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
124
125
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
126
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
127
			return cHandles
128
129
130
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
132
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
134
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
137
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
138
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
			return cHandles
140
141
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
142

Daniel Hiltgen's avatar
Daniel Hiltgen committed
143
144
145
146
147
148
149
150
151
152
153
154
155
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
156
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
157
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
158
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
159
160
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
161
	return oHandles
162
163
}

164
165
166
167
168
169
170
171
172
173
174
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
175
func GetGPUInfo() GpuInfoList {
176
177
178
179
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
180
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
181
182
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
183
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
184
185
186
187
188
189
190
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
191
192
193
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
194
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
195
196
197
198
199
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
200
		}
201
	}()
202

203
	if !bootstrapped {
204
		slog.Info("looking for compatible GPUs")
205
		needRefresh = false
Daniel Hiltgen's avatar
Daniel Hiltgen committed
206
		cpuCapability = GetCPUCapability()
207
		var memInfo C.mem_info_t
208
209
210
211

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
212
		}
Michael Yang's avatar
lint  
Michael Yang committed
213
214
215
216
217
218
219
220
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
					memInfo: mem,
					Library: "cpu",
					Variant: cpuCapability,
					ID:      "0",
				},
221
			},
Michael Yang's avatar
lint  
Michael Yang committed
222
		}
223
224
225

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
226
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
227
228
229
230
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
231

Daniel Hiltgen's avatar
Daniel Hiltgen committed
232
		depPath := GetDepDir()
233
234

		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
235
		cHandles = initCudaHandles()
236
237

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
238
239
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
240
241
242
243
244
245
246
247
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
248
249
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
250
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
251
252
253
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
				gpuInfo.MinimumMemory = cudaMinimumMemory
				gpuInfo.DependencyPath = depPath
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
271
272
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
273

Daniel Hiltgen's avatar
Daniel Hiltgen committed
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
					C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

296
297
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
298
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
299
300
301
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
302
		if envconfig.IntelGPU() {
303
			oHandles = initOneAPIHandles()
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
						gpuInfo.DependencyPath = depPath
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
331
					}
332
333
334
335
336
337
				}
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
338
339
340
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
341
342
343
344
345
346
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
347
348
349
350
351
352
353
354
355
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
356
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
357
358
359
360
361
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
362
					"free_swap", format.HumanBytes2(mem.FreeSwap),
363
364
365
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
366
			cpus[0].FreeSwap = mem.FreeSwap
367
368
		}

369
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
370
371
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
372
373
		}
		for i, gpu := range cudaGPUs {
374
375
376
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
377
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
378
379
380
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
381
			} else {
382
383
384
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
385
386
			}
			if memInfo.err != nil {
387
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
388
389
390
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
391
392
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
393
394
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
395
396
397
398
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
399
400
401
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
402
				"overhead", format.HumanBytes2(gpu.OSOverhead),
403
404
405
406
407
408
409
410
411
412
413
414
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
415
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
416
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

434
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
435
436
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
437
		}
438
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
439

440
441
442
443
444
445
446
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
447
448
449
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
450
	if len(resp) == 0 {
451
		resp = append(resp, cpus[0].GpuInfo)
452
453
454
455
	}
	return resp
}

456
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
457
458
459
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
460
	slog.Debug("Searching for GPU library", "name", baseLibName)
461

Daniel Hiltgen's avatar
Daniel Hiltgen committed
462
463
464
	// Start with our bundled libraries
	patterns := []string{filepath.Join(GetDepDir(), baseLibName)}

465
466
467
468
469
470
471
472
	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
473
474

	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
475
476
477
478
479
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
480
		patterns = append(patterns, filepath.Join(d, baseLibName))
481
	}
482
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
483
	slog.Debug("gpu library search", "globs", patterns)
484
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
485
486
487
488

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
489
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
490
		}
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
517
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
518
519
520
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
521
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
522
	var resp C.cudart_init_resp_t
523
	resp.ch.verbose = getVerboseState()
524
	for _, libPath := range cudartLibPaths {
525
526
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
527
		C.cudart_init(lib, &resp)
528
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
529
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
530
531
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
532
			return int(resp.num_devices), &resp.ch, libPath
533
534
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
535
	return 0, nil, ""
536
537
}

538
539
540
541
542
543
544
545
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
			// Decide what log level based on the type of error message to help users understand why
			msg := C.GoString(resp.err)
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
			case C.CUDA_ERROR_NO_DEVICE:
				slog.Info("no nvidia devices detected", "library", libPath)
			case C.CUDA_ERROR_UNKNOWN:
				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
			default:
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
				}
			}
563
564
565
566
567
568
569
570
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
588
589
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
590
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
591
592
593
594
595
596
597
598
599
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
600
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
601
602
603
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
604
605
606
607
608
		}
	}
	return 0, nil, ""
}

609
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
610
	if envconfig.Debug() {
611
612
613
614
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
615
616
617
618
619
620
621
622
623
624
625
626
627
628

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
629
630
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
631
632
633
634
635
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663

func GetDepDir() string {
	// On Windows/linux we bundle the dependencies at the same level as the executable
	appExe, err := os.Executable()
	if err != nil {
		slog.Warn("failed to lookup executable path", "error", err)
	}
	cwd, err := os.Getwd()
	if err != nil {
		slog.Warn("failed to lookup working directory", "error", err)
	}
	// Scan for any of our dependeices, and pick first match
	for _, root := range []string{filepath.Dir(appExe), cwd} {
		libDep := "ollama_libs"
		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
			return filepath.Join(root, libDep)
		}
		// Developer mode, local build
		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
	}
	slog.Warn("unable to locate gpu dependency libraries")
	return ""
}