gpu.go 10.7 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
	"github.com/ollama/ollama/server/envconfig"
25
26
27
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
32
}

Michael Yang's avatar
Michael Yang committed
33
const (
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
36
37
)

38
39
var gpuMutex sync.Mutex

40
41
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
42

Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
var RocmComputeMin = 9
44

Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

83
84
85
86
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

87
// Note: gpuMutex must already be held
88
func initGPUHandles() *handles {
89

90
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
91

Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
	gpuHandles := &handles{}
93
94
	var cudartMgmtName string
	var cudartMgmtPatterns []string
95
96
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
97
98

	tmpDir, _ := PayloadsDir()
99
100
	switch runtime.GOOS {
	case "windows":
101
102
103
104
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
105
106
107
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
108
	case "linux":
109
110
111
112
113
114
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
115
116
117
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
118
	default:
119
		return gpuHandles
120
121
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
	slog.Debug("Detecting GPUs")
123
124
125
126
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
127
			slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
128
129
130
131
132
133
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

134
135
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
137
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
			slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
139
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
			gpuHandles.deviceCount = deviceCount
141
			return gpuHandles
142
143
		}
	}
144
	return gpuHandles
145
146
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
func GetGPUInfo() GpuInfoList {
148
149
150
151
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
152
153
154
155
156
157

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
158
159
160
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
161
	}()
162

163
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
164
	cpuVariant := GetCPUVariant()
165
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
166
167
168
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
169
170
171
172
173
174
	// On windows we bundle the nvidia library one level above the runner dir
	depPath := ""
	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
		depPath = filepath.Dir(envconfig.RunnersDir)
	}

175
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
177
178
179
180
181
182
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
183
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
184
185
186
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
188
		var driverMajor int
		var driverMinor int
189
190
191
192
		if gpuHandles.cudart != nil {
			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
		} else {
			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
193
194
			driverMajor = int(gpuHandles.nvcuda.driver_major)
			driverMinor = int(gpuHandles.nvcuda.driver_minor)
195
		}
196
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
198
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
199
			continue
200
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
202
203
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
204
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
205
206
207
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
		gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
209
		gpuInfo.MinimumMemory = cudaMinimumMemory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
210
		gpuInfo.DependencyPath = depPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
211
212
213
		gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
		gpuInfo.DriverMajor = int(driverMajor)
		gpuInfo.DriverMinor = int(driverMinor)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
214
215
216

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
217
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
218
219
220
221
222

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
223
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
224
225
226
227
228
229
230
231
232
233
234
235
236
237
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
238
	}
239

240
241
242
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
243
func GetCPUMem() (memInfo, error) {
244
245
246
247
248
249
250
251
252
253
254
255
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

256
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
257
258
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
259
	var patterns []string
260
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
261
	slog.Debug("Searching for GPU library", "name", baseLibName)
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
279
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
280
	slog.Debug("gpu library search", "globs", patterns)
281
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
282
283
284
285
286

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
		}
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
313
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
314
315
316
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
317
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
318
	var resp C.cudart_init_resp_t
319
	resp.ch.verbose = getVerboseState()
320
	for _, libPath := range cudartLibPaths {
321
322
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
323
		C.cudart_init(lib, &resp)
324
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
325
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
326
327
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
328
			return int(resp.num_devices), &resp.ch, libPath
329
330
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
331
	return 0, nil, ""
332
333
}

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

351
func getVerboseState() C.uint16_t {
352
	if envconfig.Debug {
353
354
355
356
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}