gpu.go 11.6 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22

23
	"github.com/ollama/ollama/envconfig"
24
	"github.com/ollama/ollama/format"
25
26
27
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
Wang,Zhe's avatar
Wang,Zhe committed
31
	oneapi      *C.oneapi_handle_t
32
33
}

Michael Yang's avatar
Michael Yang committed
34
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
35
36
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
37
38
)

39
40
var gpuMutex sync.Mutex

41
42
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
43

Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
var RocmComputeMin = 9
45

Daniel Hiltgen's avatar
Daniel Hiltgen committed
46
47
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
48

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

Wang,Zhe's avatar
Wang,Zhe committed
84
85
86
87
88
89
90
91
92
var OneapiWindowsGlobs = []string{
	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

var OneapiLinuxGlobs = []string{
	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
	"/usr/lib*/libze_intel_gpu.so*",
}

93
94
95
96
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

97
// Note: gpuMutex must already be held
98
func initGPUHandles() *handles {
99

100
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
101

Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
	gpuHandles := &handles{}
103
104
	var cudartMgmtName string
	var cudartMgmtPatterns []string
105
106
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
107
108

	tmpDir, _ := PayloadsDir()
109
110
	switch runtime.GOOS {
	case "windows":
111
112
113
114
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
115
116
117
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
118
	case "linux":
119
120
121
122
123
124
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
125
126
127
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
128
	default:
129
		return gpuHandles
130
131
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
132
	slog.Debug("Detecting GPUs")
133
134
135
136
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
138
139
140
141
142
143
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

144
145
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
146
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
147
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
148
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
149
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
150
			gpuHandles.deviceCount = deviceCount
151
			return gpuHandles
152
153
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
154

155
	return gpuHandles
156
157
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
158
func GetGPUInfo() GpuInfoList {
159
160
161
162
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
163
164
165
166
167
168

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
169
170
171
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
172
	}()
173

174
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
175
	cpuVariant := GetCPUVariant()
176
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
177
178
179
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
180
181
182
183
184
185
	// On windows we bundle the nvidia library one level above the runner dir
	depPath := ""
	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
		depPath = filepath.Dir(envconfig.RunnersDir)
	}

186
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
188
189
	resp := []GpuInfo{}

	// NVIDIA first
Michael Yang's avatar
Michael Yang committed
190
	for i := range gpuHandles.deviceCount {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
191
192
193
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
194
		}
Wang,Zhe's avatar
Wang,Zhe committed
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
		if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
			gpuInfo := GpuInfo{
				Library: "cuda",
			}
			var driverMajor int
			var driverMinor int
			if gpuHandles.cudart != nil {
				C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
			} else {
				C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
				driverMajor = int(gpuHandles.nvcuda.driver_major)
				driverMinor = int(gpuHandles.nvcuda.driver_minor)
			}
			if memInfo.err != nil {
				slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
			if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
				slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
				continue
			}
			gpuInfo.TotalMemory = uint64(memInfo.total)
			gpuInfo.FreeMemory = uint64(memInfo.free)
			gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
			gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
			gpuInfo.MinimumMemory = cudaMinimumMemory
			gpuInfo.DependencyPath = depPath
			gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
Michael Yang's avatar
Michael Yang committed
224
225
			gpuInfo.DriverMajor = driverMajor
			gpuInfo.DriverMinor = driverMinor
Wang,Zhe's avatar
Wang,Zhe committed
226
227
228

			// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
			resp = append(resp, gpuInfo)
229
230
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
231
232
233
234
235

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
236
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
251
	}
252

253
254
255
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
256
func GetCPUMem() (memInfo, error) {
257
258
259
260
261
262
263
264
265
266
267
268
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

269
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
270
271
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
272
	var patterns []string
273
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
274
	slog.Debug("Searching for GPU library", "name", baseLibName)
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
292
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
293
	slog.Debug("gpu library search", "globs", patterns)
294
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
295
296
297
298

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
299
			continue
Daniel Hiltgen's avatar
Daniel Hiltgen committed
300
		}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
327
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
328
329
330
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
331
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
332
	var resp C.cudart_init_resp_t
333
	resp.ch.verbose = getVerboseState()
334
	for _, libPath := range cudartLibPaths {
335
336
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
337
		C.cudart_init(lib, &resp)
338
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
339
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
340
341
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
342
			return int(resp.num_devices), &resp.ch, libPath
343
344
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
345
	return 0, nil, ""
346
347
}

348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.oh, libPath
		}
	}
	return 0, nil, ""
}

382
func getVerboseState() C.uint16_t {
383
	if envconfig.Debug {
384
385
386
387
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
402
403
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
404
405
406
407
408
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}