gpu.go 10 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
25
26
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
28
	deviceCount int
	cudart      *C.cudart_handle_t
29
	nvcuda      *C.nvcuda_handle_t
30
31
}

Michael Yang's avatar
Michael Yang committed
32
const (
33
34
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
35
36
)

37
38
var gpuMutex sync.Mutex

39
40
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
41

Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
var RocmComputeMin = 9
43

Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
45
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

82
83
84
85
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

86
// Note: gpuMutex must already be held
87
func initGPUHandles() *handles {
88

89
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
90

Daniel Hiltgen's avatar
Daniel Hiltgen committed
91
	gpuHandles := &handles{}
92
93
	var cudartMgmtName string
	var cudartMgmtPatterns []string
94
95
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
96
97

	tmpDir, _ := PayloadsDir()
98
99
	switch runtime.GOOS {
	case "windows":
100
101
102
103
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
104
105
106
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
107
	case "linux":
108
109
110
111
112
113
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
114
115
116
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
117
	default:
118
		return gpuHandles
119
120
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
121
	slog.Info("Detecting GPUs")
122
123
124
125
126
127
128
129
130
131
132
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

133
134
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
135
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
136
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
137
			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
138
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
			gpuHandles.deviceCount = deviceCount
140
			return gpuHandles
141
142
		}
	}
143
	return gpuHandles
144
145
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
146
func GetGPUInfo() GpuInfoList {
147
148
149
150
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
151
152
153
154
155
156

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
157
158
159
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
160
	}()
161

162
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
163
	cpuVariant := GetCPUVariant()
164
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
165
166
167
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

168
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
169
170
171
172
173
174
175
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
176
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
177
178
179
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
180
181
182
183
184
		if gpuHandles.cudart != nil {
			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
		} else {
			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
		}
185
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
186
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
187
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
188
			continue
189
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
190
191
192
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
193
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
194
195
196
197
198
199
200
201
202
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		gpuInfo.Major = int(memInfo.major)
		gpuInfo.Minor = int(memInfo.minor)
		gpuInfo.MinimumMemory = cudaMinimumMemory

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
203
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
204
205
206
207
208

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
209
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
224
	}
225

226
227
228
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
229
func GetCPUMem() (memInfo, error) {
230
231
232
233
234
235
236
237
238
239
240
241
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

242
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
243
244
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
245
	var patterns []string
246
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
247
	slog.Debug("Searching for GPU library", "name", baseLibName)
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
265
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
266
	slog.Debug("gpu library search", "globs", patterns)
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
	for _, pattern := range patterns {
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
294
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
295
296
297
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
298
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
299
	var resp C.cudart_init_resp_t
300
	resp.ch.verbose = getVerboseState()
301
	for _, libPath := range cudartLibPaths {
302
303
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
304
		C.cudart_init(lib, &resp)
305
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
306
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
307
308
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
309
			return int(resp.num_devices), &resp.ch, libPath
310
311
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
312
	return 0, nil, ""
313
314
}

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

332
333
334
335
336
337
func getVerboseState() C.uint16_t {
	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}