gpu.go 21.1 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
#include "gpu_info.h"
*/
import "C"
Michael Yang's avatar
lint  
Michael Yang committed
12

13
14
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"regexp"
19
	"runtime"
20
	"strconv"
21
	"strings"
22
23
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
24

25
	"github.com/ollama/ollama/envconfig"
26
	"github.com/ollama/ollama/format"
27
28
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
type cudaHandles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
30
31
	deviceCount int
	cudart      *C.cudart_handle_t
32
	nvcuda      *C.nvcuda_handle_t
33
	nvml        *C.nvml_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
36
}

type oneapiHandles struct {
Wang,Zhe's avatar
Wang,Zhe committed
37
	oneapi      *C.oneapi_handle_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
38
	deviceCount int
39
40
}

Michael Yang's avatar
Michael Yang committed
41
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
43
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
	// TODO OneAPI minimum memory
Michael Yang's avatar
Michael Yang committed
45
46
)

47
48
49
50
51
52
53
54
55
var (
	gpuMutex      sync.Mutex
	bootstrapped  bool
	cpuCapability CPUCapability
	cpus          []CPUInfo
	cudaGPUs      []CudaGPUInfo
	nvcudaLibPath string
	cudartLibPath string
	oneapiLibPath string
56
	nvmlLibPath   string
57
58
59
	rocmGPUs      []RocmGPUInfo
	oneapiGPUs    []OneapiGPUInfo
)
60

61
62
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
63

Daniel Hiltgen's avatar
Daniel Hiltgen committed
64
var RocmComputeMin = 9
65

Daniel Hiltgen's avatar
Daniel Hiltgen committed
66
67
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
68

69
70
71
72
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

73
// Note: gpuMutex must already be held
Daniel Hiltgen's avatar
Daniel Hiltgen committed
74
func initCudaHandles() *cudaHandles {
75
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
76

Daniel Hiltgen's avatar
Daniel Hiltgen committed
77
	cHandles := &cudaHandles{}
78
	// Short Circuit if we already know which library to use
79
80
81
82
	if nvmlLibPath != "" {
		cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
		return cHandles
	}
83
	if nvcudaLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
84
85
		cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
		return cHandles
86
87
	}
	if cudartLibPath != "" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
88
89
		cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
		return cHandles
90
91
92
	}

	slog.Debug("searching for GPU discovery libraries for NVIDIA")
93
	var cudartMgmtPatterns []string
94

Daniel Hiltgen's avatar
Daniel Hiltgen committed
95
96
	// Aligned with driver, we can't carry as payloads
	nvcudaMgmtPatterns := NvcudaGlobs
97

Daniel Hiltgen's avatar
Daniel Hiltgen committed
98
99
100
101
102
103
104
105
	if runtime.GOOS == "windows" {
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
	}
	tmpDir, _ := PayloadsDir()
	if tmpDir != "" {
		// TODO - add "payloads" for subprocess
		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
106
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
107
	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
108

Daniel Hiltgen's avatar
Daniel Hiltgen committed
109
110
	if len(NvmlGlobs) > 0 {
		nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
111
112
113
114
115
116
117
118
119
120
		if len(nvmlLibPaths) > 0 {
			nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
			if nvml != nil {
				slog.Debug("nvidia-ml loaded", "library", libPath)
				cHandles.nvml = nvml
				nvmlLibPath = libPath
			}
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
121
	nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
122
123
124
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
126
127
			cHandles.nvcuda = nvcuda
			cHandles.deviceCount = deviceCount
128
			nvcudaLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
129
			return cHandles
130
131
132
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
133
	cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
134
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
136
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
139
			cHandles.cudart = cudart
			cHandles.deviceCount = deviceCount
140
			cudartLibPath = libPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
141
			return cHandles
142
143
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
144

Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
146
147
148
149
150
151
152
153
154
155
156
157
	return cHandles
}

// Note: gpuMutex must already be held
func initOneAPIHandles() *oneapiHandles {
	oHandles := &oneapiHandles{}

	// Short Circuit if we already know which library to use
	if oneapiLibPath != "" {
		oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
		return oHandles
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
158
	oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
159
	if len(oneapiLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
160
		oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
161
162
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
163
	return oHandles
164
165
}

166
167
168
169
170
171
172
173
174
175
176
func GetCPUInfo() GpuInfoList {
	gpuMutex.Lock()
	if !bootstrapped {
		gpuMutex.Unlock()
		GetGPUInfo()
	} else {
		gpuMutex.Unlock()
	}
	return GpuInfoList{cpus[0].GpuInfo}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
177
func GetGPUInfo() GpuInfoList {
178
179
180
181
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
182
	needRefresh := true
Daniel Hiltgen's avatar
Daniel Hiltgen committed
183
184
	var cHandles *cudaHandles
	var oHandles *oneapiHandles
185
	defer func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
186
187
188
189
190
191
192
		if cHandles != nil {
			if cHandles.cudart != nil {
				C.cudart_release(*cHandles.cudart)
			}
			if cHandles.nvcuda != nil {
				C.nvcuda_release(*cHandles.nvcuda)
			}
193
194
195
			if cHandles.nvml != nil {
				C.nvml_release(*cHandles.nvml)
			}
196
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
198
199
200
201
		if oHandles != nil {
			if oHandles.oneapi != nil {
				// TODO - is this needed?
				C.oneapi_release(*oHandles.oneapi)
			}
202
		}
203
	}()
204

205
	if !bootstrapped {
206
		slog.Info("looking for compatible GPUs")
207
		needRefresh = false
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
		cpuCapability = GetCPUCapability()
209
		var memInfo C.mem_info_t
210
211
212
213

		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
214
		}
Michael Yang's avatar
lint  
Michael Yang committed
215
216
217
218
219
		cpus = []CPUInfo{
			{
				GpuInfo: GpuInfo{
					memInfo: mem,
					Library: "cpu",
220
					Variant: cpuCapability.String(),
Michael Yang's avatar
lint  
Michael Yang committed
221
222
					ID:      "0",
				},
223
			},
Michael Yang's avatar
lint  
Michael Yang committed
224
		}
225
226
227

		// Fallback to CPU mode if we're lacking required vector extensions on x86
		if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
228
			slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
229
230
231
232
			bootstrapped = true
			// No need to do any GPU discovery, since we can't run on them
			return GpuInfoList{cpus[0].GpuInfo}
		}
233

Daniel Hiltgen's avatar
Daniel Hiltgen committed
234
		depPath := GetDepDir()
235

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
		var cudaVariant string
		if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
			if CudaTegra != "" {
				ver := strings.Split(CudaTegra, ".")
				if len(ver) > 0 {
					cudaVariant = "jetpack" + ver[0]
				}
			} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
				r := regexp.MustCompile(` R(\d+) `)
				m := r.FindSubmatch(data)
				if len(m) != 2 {
					slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
				} else {
					if l4t, err := strconv.Atoi(string(m[1])); err == nil {
						// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
						// https://developer.nvidia.com/embedded/jetpack-archive
						switch l4t {
						case 35:
							cudaVariant = "jetpack5"
						case 36:
							cudaVariant = "jetpack6"
						default:
							slog.Info("unsupported L4T version", "nv_tegra_release", string(data))
						}
					}
				}
			}
		}

265
		// Load ALL libraries
Daniel Hiltgen's avatar
Daniel Hiltgen committed
266
		cHandles = initCudaHandles()
267
268

		// NVIDIA
Daniel Hiltgen's avatar
Daniel Hiltgen committed
269
270
		for i := range cHandles.deviceCount {
			if cHandles.cudart != nil || cHandles.nvcuda != nil {
271
272
273
				gpuInfo := CudaGPUInfo{
					GpuInfo: GpuInfo{
						Library: "cuda",
274
						Variant: cudaVariant,
275
276
277
278
279
					},
					index: i,
				}
				var driverMajor int
				var driverMinor int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
280
281
				if cHandles.cudart != nil {
					C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo)
282
				} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
283
284
285
					C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo)
					driverMajor = int(cHandles.nvcuda.driver_major)
					driverMinor = int(cHandles.nvcuda.driver_minor)
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
				}
				if memInfo.err != nil {
					slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
					C.free(unsafe.Pointer(memInfo.err))
					continue
				}
				if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
					slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
					continue
				}
				gpuInfo.TotalMemory = uint64(memInfo.total)
				gpuInfo.FreeMemory = uint64(memInfo.free)
				gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
				gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
				gpuInfo.MinimumMemory = cudaMinimumMemory
301
302
303
304
305
306
307
308
309
				if depPath != "" {
					gpuInfo.DependencyPath = depPath
					// Check for variant specific directory
					if cudaVariant != "" {
						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
						}
					}
				}
310
				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
311
312
				gpuInfo.DriverMajor = driverMajor
				gpuInfo.DriverMinor = driverMinor
313

Daniel Hiltgen's avatar
Daniel Hiltgen committed
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
				// query the management library as well so we can record any skew between the two
				// which represents overhead on the GPU we must set aside on subsequent updates
				if cHandles.nvml != nil {
					C.nvml_get_free(*cHandles.nvml, C.int(gpuInfo.index), &memInfo.free, &memInfo.total, &memInfo.used)
					if memInfo.err != nil {
						slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
						C.free(unsafe.Pointer(memInfo.err))
					} else {
						if memInfo.free != 0 && uint64(memInfo.free) > gpuInfo.FreeMemory {
							gpuInfo.OSOverhead = uint64(memInfo.free) - gpuInfo.FreeMemory
							slog.Info("detected OS VRAM overhead",
								"id", gpuInfo.ID,
								"library", gpuInfo.Library,
								"compute", gpuInfo.Compute,
								"driver", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor),
								"name", gpuInfo.Name,
								"overhead", format.HumanBytes2(gpuInfo.OSOverhead),
							)
						}
					}
				}

336
337
				// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
				cudaGPUs = append(cudaGPUs, gpuInfo)
Wang,Zhe's avatar
Wang,Zhe committed
338
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
339
340
341
		}

		// Intel
Michael Yang's avatar
bool  
Michael Yang committed
342
		if envconfig.IntelGPU() {
343
			oHandles = initOneAPIHandles()
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
			if oHandles != nil && oHandles.oneapi != nil {
				for d := range oHandles.oneapi.num_drivers {
					if oHandles.oneapi == nil {
						// shouldn't happen
						slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
						continue
					}
					devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
					for i := range devCount {
						gpuInfo := OneapiGPUInfo{
							GpuInfo: GpuInfo{
								Library: "oneapi",
							},
							driverIndex: int(d),
							gpuIndex:    int(i),
						}
						// TODO - split bootstrapping from updating free memory
						C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
						// TODO - convert this to MinimumMemory based on testing...
						var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
						memInfo.free = C.uint64_t(totalFreeMem)
						gpuInfo.TotalMemory = uint64(memInfo.total)
						gpuInfo.FreeMemory = uint64(memInfo.free)
						gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
						gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
						gpuInfo.DependencyPath = depPath
						oneapiGPUs = append(oneapiGPUs, gpuInfo)
371
					}
372
373
374
375
376
377
				}
			}
		}

		rocmGPUs = AMDGetGPUInfo()
		bootstrapped = true
378
379
380
		if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
			slog.Info("no compatible GPUs were discovered")
		}
381
382
383
384
385
386
	}

	// For detected GPUs, load library if not loaded

	// Refresh free memory usage
	if needRefresh {
387
388
389
390
391
392
393
394
395
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
		} else {
			slog.Debug("updating system memory data",
				slog.Group(
					"before",
					"total", format.HumanBytes2(cpus[0].TotalMemory),
					"free", format.HumanBytes2(cpus[0].FreeMemory),
396
					"free_swap", format.HumanBytes2(cpus[0].FreeSwap),
397
398
399
400
401
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(mem.TotalMemory),
					"free", format.HumanBytes2(mem.FreeMemory),
402
					"free_swap", format.HumanBytes2(mem.FreeSwap),
403
404
405
				),
			)
			cpus[0].FreeMemory = mem.FreeMemory
406
			cpus[0].FreeSwap = mem.FreeSwap
407
408
		}

409
		var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
410
411
		if cHandles == nil && len(cudaGPUs) > 0 {
			cHandles = initCudaHandles()
412
413
		}
		for i, gpu := range cudaGPUs {
414
415
416
			if cHandles.nvml != nil {
				C.nvml_get_free(*cHandles.nvml, C.int(gpu.index), &memInfo.free, &memInfo.total, &memInfo.used)
			} else if cHandles.cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
417
				C.cudart_bootstrap(*cHandles.cudart, C.int(gpu.index), &memInfo)
418
419
420
			} else if cHandles.nvcuda != nil {
				C.nvcuda_get_free(*cHandles.nvcuda, C.int(gpu.index), &memInfo.free, &memInfo.total)
				memInfo.used = memInfo.total - memInfo.free
Wang,Zhe's avatar
Wang,Zhe committed
421
			} else {
422
423
424
				// shouldn't happen
				slog.Warn("no valid cuda library loaded to refresh vram usage")
				break
Wang,Zhe's avatar
Wang,Zhe committed
425
426
			}
			if memInfo.err != nil {
427
				slog.Warn("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
Wang,Zhe's avatar
Wang,Zhe committed
428
429
430
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
431
432
			if memInfo.free == 0 {
				slog.Warn("error looking up nvidia GPU memory")
Wang,Zhe's avatar
Wang,Zhe committed
433
434
				continue
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
435
436
437
438
			if cHandles.nvml != nil && gpu.OSOverhead > 0 {
				// When using the management library update based on recorded overhead
				memInfo.free -= C.uint64_t(gpu.OSOverhead)
			}
439
440
441
			slog.Debug("updating cuda memory data",
				"gpu", gpu.ID,
				"name", gpu.Name,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
442
				"overhead", format.HumanBytes2(gpu.OSOverhead),
443
444
445
446
447
448
449
450
451
452
453
454
				slog.Group(
					"before",
					"total", format.HumanBytes2(gpu.TotalMemory),
					"free", format.HumanBytes2(gpu.FreeMemory),
				),
				slog.Group(
					"now",
					"total", format.HumanBytes2(uint64(memInfo.total)),
					"free", format.HumanBytes2(uint64(memInfo.free)),
					"used", format.HumanBytes2(uint64(memInfo.used)),
				),
			)
455
			cudaGPUs[i].FreeMemory = uint64(memInfo.free)
456
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473

		if oHandles == nil && len(oneapiGPUs) > 0 {
			oHandles = initOneAPIHandles()
		}
		for i, gpu := range oneapiGPUs {
			if oHandles.oneapi == nil {
				// shouldn't happen
				slog.Warn("nil oneapi handle with device count", "count", oHandles.deviceCount)
				continue
			}
			C.oneapi_check_vram(*oHandles.oneapi, C.int(gpu.driverIndex), C.int(gpu.gpuIndex), &memInfo)
			// TODO - convert this to MinimumMemory based on testing...
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			oneapiGPUs[i].FreeMemory = uint64(memInfo.free)
		}

474
		err = RocmGPUInfoList(rocmGPUs).RefreshFreeMemory()
475
476
		if err != nil {
			slog.Debug("problem refreshing ROCm free memory", "error", err)
477
		}
478
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
479

480
481
482
483
484
485
486
	resp := []GpuInfo{}
	for _, gpu := range cudaGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
	for _, gpu := range rocmGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
487
488
489
	for _, gpu := range oneapiGPUs {
		resp = append(resp, gpu.GpuInfo)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
490
	if len(resp) == 0 {
491
		resp = append(resp, cpus[0].GpuInfo)
492
493
494
495
	}
	return resp
}

496
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
497
498
499
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
500
	slog.Debug("Searching for GPU library", "name", baseLibName)
501

Daniel Hiltgen's avatar
Daniel Hiltgen committed
502
503
504
	// Start with our bundled libraries
	patterns := []string{filepath.Join(GetDepDir(), baseLibName)}

505
506
507
508
509
510
511
512
	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
513
514

	// Then with whatever we find in the PATH/LD_LIBRARY_PATH
515
516
517
518
519
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
520
		patterns = append(patterns, filepath.Join(d, baseLibName))
521
	}
522
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
523
	slog.Debug("gpu library search", "globs", patterns)
524
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
525
526
527
528

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
529
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
530
		}
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
557
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
558
559
560
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
561
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
562
	var resp C.cudart_init_resp_t
563
	resp.ch.verbose = getVerboseState()
564
	for _, libPath := range cudartLibPaths {
565
566
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
567
		C.cudart_init(lib, &resp)
568
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
569
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
570
571
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
572
			return int(resp.num_devices), &resp.ch, libPath
573
574
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
575
	return 0, nil, ""
576
577
}

578
579
580
581
582
583
584
585
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
			// Decide what log level based on the type of error message to help users understand why
			msg := C.GoString(resp.err)
			switch resp.cudaErr {
			case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
				slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
			case C.CUDA_ERROR_NO_DEVICE:
				slog.Info("no nvidia devices detected", "library", libPath)
			case C.CUDA_ERROR_UNKNOWN:
				slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
				slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
			default:
				if strings.Contains(msg, "wrong ELF class") {
					slog.Debug("skipping 32bit library", "library", libPath)
				} else {
					slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
				}
			}
603
604
605
606
607
608
609
610
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
	var resp C.nvml_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvmlLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvml_init(lib, &resp)
		if resp.err != nil {
			slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return &resp.ch, libPath
		}
	}
	return nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
628
629
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
630
	num_devices := 0
Wang,Zhe's avatar
Wang,Zhe committed
631
632
633
634
635
636
637
638
639
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
640
			for i := range resp.oh.num_drivers {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
641
642
643
				num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
			}
			return num_devices, &resp.oh, libPath
Wang,Zhe's avatar
Wang,Zhe committed
644
645
646
647
648
		}
	}
	return 0, nil, ""
}

649
func getVerboseState() C.uint16_t {
Michael Yang's avatar
Michael Yang committed
650
	if envconfig.Debug() {
651
652
653
654
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
655
656
657
658
659
660
661
662
663
664
665
666
667
668

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
669
670
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
671
672
673
674
675
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703

func GetDepDir() string {
	// On Windows/linux we bundle the dependencies at the same level as the executable
	appExe, err := os.Executable()
	if err != nil {
		slog.Warn("failed to lookup executable path", "error", err)
	}
	cwd, err := os.Getwd()
	if err != nil {
		slog.Warn("failed to lookup working directory", "error", err)
	}
	// Scan for any of our dependeices, and pick first match
	for _, root := range []string{filepath.Dir(appExe), cwd} {
		libDep := "ollama_libs"
		if _, err := os.Stat(filepath.Join(root, libDep)); err == nil {
			return filepath.Join(root, libDep)
		}
		// Developer mode, local build
		if _, err := os.Stat(filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
		if _, err := os.Stat(filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)); err == nil {
			return filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH, libDep)
		}
	}
	slog.Warn("unable to locate gpu dependency libraries")
	return ""
}