gpu.go 10.4 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
	"github.com/ollama/ollama/server/envconfig"
25
26
27
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
32
}

Michael Yang's avatar
Michael Yang committed
33
const (
34
35
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
36
37
)

38
39
var gpuMutex sync.Mutex

40
41
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
42

Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
var RocmComputeMin = 9
44

Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

83
84
85
86
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

87
// Note: gpuMutex must already be held
88
func initGPUHandles() *handles {
89

90
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
91

Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
	gpuHandles := &handles{}
93
94
	var cudartMgmtName string
	var cudartMgmtPatterns []string
95
96
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
97
98

	tmpDir, _ := PayloadsDir()
99
100
	switch runtime.GOOS {
	case "windows":
101
102
103
104
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
105
106
107
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
108
	case "linux":
109
110
111
112
113
114
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
115
116
117
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
118
	default:
119
		return gpuHandles
120
121
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
	slog.Info("Detecting GPUs")
123
124
125
126
127
128
129
130
131
132
133
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

134
135
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
137
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
139
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
			gpuHandles.deviceCount = deviceCount
141
			return gpuHandles
142
143
		}
	}
144
	return gpuHandles
145
146
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
func GetGPUInfo() GpuInfoList {
148
149
150
151
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
152
153
154
155
156
157

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
158
159
160
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
161
	}()
162

163
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
164
	cpuVariant := GetCPUVariant()
165
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
166
167
168
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
169
170
171
172
173
174
	// On windows we bundle the nvidia library one level above the runner dir
	depPath := ""
	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
		depPath = filepath.Dir(envconfig.RunnersDir)
	}

175
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
176
177
178
179
180
181
182
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
183
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
184
185
186
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
187
188
189
190
191
		if gpuHandles.cudart != nil {
			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
		} else {
			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
		}
192
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
193
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
194
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
195
			continue
196
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
197
198
199
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
200
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
201
202
203
204
205
206
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		gpuInfo.Major = int(memInfo.major)
		gpuInfo.Minor = int(memInfo.minor)
		gpuInfo.MinimumMemory = cudaMinimumMemory
Daniel Hiltgen's avatar
Daniel Hiltgen committed
207
		gpuInfo.DependencyPath = depPath
Daniel Hiltgen's avatar
Daniel Hiltgen committed
208
209
210

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
211
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
212
213
214
215
216

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
217
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
218
219
220
221
222
223
224
225
226
227
228
229
230
231
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
232
	}
233

234
235
236
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
237
func GetCPUMem() (memInfo, error) {
238
239
240
241
242
243
244
245
246
247
248
249
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

250
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
251
252
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
253
	var patterns []string
254
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
255
	slog.Debug("Searching for GPU library", "name", baseLibName)
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
273
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
274
	slog.Debug("gpu library search", "globs", patterns)
275
	for _, pattern := range patterns {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
276
277
278
279
280

		// Nvidia PhysX known to return bogus results
		if strings.Contains(pattern, "PhysX") {
			slog.Debug("skipping PhysX cuda library path", "path", pattern)
		}
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
307
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
308
309
310
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
311
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
312
	var resp C.cudart_init_resp_t
313
	resp.ch.verbose = getVerboseState()
314
	for _, libPath := range cudartLibPaths {
315
316
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
317
		C.cudart_init(lib, &resp)
318
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
319
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
320
321
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
322
			return int(resp.num_devices), &resp.ch, libPath
323
324
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
325
	return 0, nil, ""
326
327
}

328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

345
func getVerboseState() C.uint16_t {
346
	if envconfig.Debug {
347
348
349
350
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}