"doc/vscode:/vscode.git/clone" did not exist on "3a3021502e7f52c0e8cd70a620e1b2b16f363cd6"
gpu.go 12.7 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
Wang,Zhe's avatar
Wang,Zhe committed
19
	"strconv"
20
	"strings"
21
22
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
23
24

	"github.com/ollama/ollama/format"
25
	"github.com/ollama/ollama/envconfig"
26
27
28
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
29
30
	deviceCount int
	cudart      *C.cudart_handle_t
31
	nvcuda      *C.nvcuda_handle_t
Wang,Zhe's avatar
Wang,Zhe committed
32
	oneapi      *C.oneapi_handle_t
33
34
}

Michael Yang's avatar
Michael Yang committed
35
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
36
37
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
38
39
)

40
41
var gpuMutex sync.Mutex

42
43
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
44

Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
var RocmComputeMin = 9
46

Daniel Hiltgen's avatar
Daniel Hiltgen committed
47
48
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

Wang,Zhe's avatar
Wang,Zhe committed
85
86
87
88
89
90
91
92
93
var OneapiWindowsGlobs = []string{
	"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}

var OneapiLinuxGlobs = []string{
	"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
	"/usr/lib*/libze_intel_gpu.so*",
}

94
95
96
97
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

98
// Note: gpuMutex must already be held
99
func initGPUHandles() *handles {
100

101
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
102

Daniel Hiltgen's avatar
Daniel Hiltgen committed
103
	gpuHandles := &handles{}
104
105
	var cudartMgmtName string
	var cudartMgmtPatterns []string
106
107
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
Wang,Zhe's avatar
Wang,Zhe committed
108
109
	var oneapiMgmtName string
	var oneapiMgmtPatterns []string
110
111

	tmpDir, _ := PayloadsDir()
112
113
	switch runtime.GOOS {
	case "windows":
114
115
116
117
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
118
119
120
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
Wang,Zhe's avatar
Wang,Zhe committed
121
122
		oneapiMgmtName = "ze_intel_gpu64.dll"
		oneapiMgmtPatterns = OneapiWindowsGlobs
123
	case "linux":
124
125
126
127
128
129
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
130
131
132
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
Wang,Zhe's avatar
Wang,Zhe committed
133
134
		oneapiMgmtName = "libze_intel_gpu.so"
		oneapiMgmtPatterns = OneapiLinuxGlobs
135
	default:
136
		return gpuHandles
137
138
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
	slog.Debug("Detecting GPUs")
140
141
142
143
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
145
146
147
148
149
150
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

151
152
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
153
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
154
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
155
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
156
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
157
			gpuHandles.deviceCount = deviceCount
158
			return gpuHandles
159
160
		}
	}
Wang,Zhe's avatar
Wang,Zhe committed
161
162
163
164
165
166
167
168
169
170
171
172

	oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
	if len(oneapiLibPaths) > 0 {
		deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths)
		if oneapi != nil {
			slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
			gpuHandles.oneapi = oneapi
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

173
	return gpuHandles
174
175
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
func GetGPUInfo() GpuInfoList {
177
178
179
180
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
181
182
183
184
185
186

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
187
188
189
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
190
	}()
191

192
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
193
	cpuVariant := GetCPUVariant()
194
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
195
196
197
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
198
199
200
201
202
203
	// On windows we bundle the nvidia library one level above the runner dir
	depPath := ""
	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
		depPath = filepath.Dir(envconfig.RunnersDir)
	}

204
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
205
206
207
208
209
210
211
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
212
		}
Wang,Zhe's avatar
Wang,Zhe committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
		if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
			gpuInfo := GpuInfo{
				Library: "cuda",
			}
			var driverMajor int
			var driverMinor int
			if gpuHandles.cudart != nil {
				C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
			} else {
				C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
				driverMajor = int(gpuHandles.nvcuda.driver_major)
				driverMinor = int(gpuHandles.nvcuda.driver_minor)
			}
			if memInfo.err != nil {
				slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
				C.free(unsafe.Pointer(memInfo.err))
				continue
			}
			if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
				slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
				continue
			}
			gpuInfo.TotalMemory = uint64(memInfo.total)
			gpuInfo.FreeMemory = uint64(memInfo.free)
			gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
			gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
			gpuInfo.MinimumMemory = cudaMinimumMemory
			gpuInfo.DependencyPath = depPath
			gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
			gpuInfo.DriverMajor = int(driverMajor)
			gpuInfo.DriverMinor = int(driverMinor)

			// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
			resp = append(resp, gpuInfo)
247
		}
Wang,Zhe's avatar
Wang,Zhe committed
248
249
250
251
252
253
254
255
256
257
258
		if gpuHandles.oneapi != nil {
			gpuInfo := GpuInfo{
				Library: "oneapi",
			}
			C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
			var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
			memInfo.free = C.uint64_t(totalFreeMem)
			gpuInfo.TotalMemory = uint64(memInfo.total)
			gpuInfo.FreeMemory = uint64(memInfo.free)
			gpuInfo.ID = strconv.Itoa(i)
			resp = append(resp, gpuInfo)
259
260
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
261
262
263
264
265

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
266
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
281
	}
282

283
284
285
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
286
func GetCPUMem() (memInfo, error) {
287
288
289
290
291
292
293
294
295
296
297
298
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

299
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
300
301
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
302
	var patterns []string
303
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
304
	slog.Debug("Searching for GPU library", "name", baseLibName)
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
322
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
323
	slog.Debug("gpu library search", "globs", patterns)
324
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
325
326
327
328
329

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
		}
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
356
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
357
358
359
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
360
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
361
	var resp C.cudart_init_resp_t
362
	resp.ch.verbose = getVerboseState()
363
	for _, libPath := range cudartLibPaths {
364
365
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
366
		C.cudart_init(lib, &resp)
367
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
368
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
369
370
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
371
			return int(resp.num_devices), &resp.ch, libPath
372
373
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
374
	return 0, nil, ""
375
376
}

377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

Wang,Zhe's avatar
Wang,Zhe committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
	var resp C.oneapi_init_resp_t
	resp.oh.verbose = getVerboseState()
	for _, libPath := range oneapiLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.oneapi_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.oh, libPath
		}
	}
	return 0, nil, ""
}

411
func getVerboseState() C.uint16_t {
412
	if envconfig.Debug {
413
414
415
416
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
Wang,Zhe's avatar
Wang,Zhe committed
431
432
	case "oneapi":
		return oneapiGetVisibleDevicesEnv(l)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
433
434
435
436
437
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}