gpu.go 10 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
	"github.com/ollama/ollama/server/envconfig"
25
26
27
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
	nvcuda      *C.nvcuda_handle_t
31
32
}

Michael Yang's avatar
Michael Yang committed
33
const (
34
35
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
36
37
)

38
39
var gpuMutex sync.Mutex

40
41
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
42

Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
var RocmComputeMin = 9
44

Daniel Hiltgen's avatar
Daniel Hiltgen committed
45
46
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
var NvcudaLinuxGlobs = []string{
	"/usr/local/cuda*/targets/*/lib/libcuda.so*",
	"/usr/lib/*-linux-gnu/nvidia/current/libcuda.so*",
	"/usr/lib/*-linux-gnu/libcuda.so*",
	"/usr/lib/wsl/lib/libcuda.so*",
	"/usr/lib/wsl/drivers/*/libcuda.so*",
	"/opt/cuda/lib*/libcuda.so*",
	"/usr/local/cuda/lib*/libcuda.so*",
	"/usr/lib*/libcuda.so*",
	"/usr/local/lib*/libcuda.so*",
}

var NvcudaWindowsGlobs = []string{
	"c:\\windows\\system*\\nvcuda.dll",
}

83
84
85
86
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

87
// Note: gpuMutex must already be held
88
func initGPUHandles() *handles {
89

90
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
91

Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
	gpuHandles := &handles{}
93
94
	var cudartMgmtName string
	var cudartMgmtPatterns []string
95
96
	var nvcudaMgmtName string
	var nvcudaMgmtPatterns []string
97
98

	tmpDir, _ := PayloadsDir()
99
100
	switch runtime.GOOS {
	case "windows":
101
102
103
104
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
105
106
107
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "nvcuda.dll"
		nvcudaMgmtPatterns = NvcudaWindowsGlobs
108
	case "linux":
109
110
111
112
113
114
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
115
116
117
		// Aligned with driver, we can't carry as payloads
		nvcudaMgmtName = "libcuda.so*"
		nvcudaMgmtPatterns = NvcudaLinuxGlobs
118
	default:
119
		return gpuHandles
120
121
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
	slog.Info("Detecting GPUs")
123
124
125
126
127
128
129
130
131
132
133
	nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
	if len(nvcudaLibPaths) > 0 {
		deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
		if nvcuda != nil {
			slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
			gpuHandles.nvcuda = nvcuda
			gpuHandles.deviceCount = deviceCount
			return gpuHandles
		}
	}

134
135
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
136
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
137
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
139
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
140
			gpuHandles.deviceCount = deviceCount
141
			return gpuHandles
142
143
		}
	}
144
	return gpuHandles
145
146
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
func GetGPUInfo() GpuInfoList {
148
149
150
151
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
152
153
154
155
156
157

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
158
159
160
		if gpuHandles.nvcuda != nil {
			C.nvcuda_release(*gpuHandles.nvcuda)
		}
161
	}()
162

163
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
164
	cpuVariant := GetCPUVariant()
165
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
166
167
168
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

169
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
170
171
172
173
174
175
176
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
177
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
178
179
180
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
181
182
183
184
185
		if gpuHandles.cudart != nil {
			C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
		} else {
			C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
		}
186
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
188
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
189
			continue
190
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
191
192
193
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
194
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
195
196
197
198
199
200
201
202
203
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		gpuInfo.Major = int(memInfo.major)
		gpuInfo.Minor = int(memInfo.minor)
		gpuInfo.MinimumMemory = cudaMinimumMemory

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
204
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
205
206
207
208
209

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
210
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
225
	}
226

227
228
229
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
230
func GetCPUMem() (memInfo, error) {
231
232
233
234
235
236
237
238
239
240
241
242
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

243
func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
244
245
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
246
	var patterns []string
247
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
248
	slog.Debug("Searching for GPU library", "name", baseLibName)
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
266
	patterns = append(patterns, defaultPatterns...)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
267
	slog.Debug("gpu library search", "globs", patterns)
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
	for _, pattern := range patterns {
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
295
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
296
297
298
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
299
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
300
	var resp C.cudart_init_resp_t
301
	resp.ch.verbose = getVerboseState()
302
	for _, libPath := range cudartLibPaths {
303
304
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
305
		C.cudart_init(lib, &resp)
306
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
307
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
308
309
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
310
			return int(resp.num_devices), &resp.ch, libPath
311
312
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
313
	return 0, nil, ""
314
315
}

316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
	var resp C.nvcuda_init_resp_t
	resp.ch.verbose = getVerboseState()
	for _, libPath := range nvcudaLibPaths {
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
		C.nvcuda_init(lib, &resp)
		if resp.err != nil {
			slog.Debug("Unable to load nvcuda", "library", libPath, "error", C.GoString(resp.err))
			C.free(unsafe.Pointer(resp.err))
		} else {
			return int(resp.num_devices), &resp.ch, libPath
		}
	}
	return 0, nil, ""
}

333
func getVerboseState() C.uint16_t {
334
	if envconfig.Debug {
335
336
337
338
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}