gpu.go 8.14 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
25
26
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
27
28
	deviceCount int
	cudart      *C.cudart_handle_t
29
30
}

Michael Yang's avatar
Michael Yang committed
31
const (
32
33
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
34
35
)

36
37
var gpuMutex sync.Mutex

38
39
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
40

Daniel Hiltgen's avatar
Daniel Hiltgen committed
41
var RocmComputeMin = 9
42

Daniel Hiltgen's avatar
Daniel Hiltgen committed
43
44
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
45

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

69
// Note: gpuMutex must already be held
70
func initGPUHandles() *handles {
71

72
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
73

Daniel Hiltgen's avatar
Daniel Hiltgen committed
74
	gpuHandles := &handles{}
75
76
77
78
	var cudartMgmtName string
	var cudartMgmtPatterns []string

	tmpDir, _ := PayloadsDir()
79
80
	switch runtime.GOOS {
	case "windows":
81
82
83
84
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
85
	case "linux":
86
87
88
89
90
91
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
92
	default:
93
		return gpuHandles
94
95
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
96
	slog.Info("Detecting GPUs")
97
98
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
99
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
100
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
101
			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
102
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
103
			gpuHandles.deviceCount = deviceCount
104
			return gpuHandles
105
106
		}
	}
107
	return gpuHandles
108
109
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
110
func GetGPUInfo() GpuInfoList {
111
112
113
114
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
115
116
117
118
119
120
121

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
	}()
122

123
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
124
	cpuVariant := GetCPUVariant()
125
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
126
127
128
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

129
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
130
131
132
133
134
135
136
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
137
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
139
140
141
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
		C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
142
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
143
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
144
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
145
			continue
146
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
147
148
149
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
150
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
151
152
153
154
155
156
157
158
159
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		gpuInfo.Major = int(memInfo.major)
		gpuInfo.Minor = int(memInfo.minor)
		gpuInfo.MinimumMemory = cudaMinimumMemory

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
160
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
161
162
163
164
165

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
166
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
167
168
169
170
171
172
173
174
175
176
177
178
179
180
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
181
	}
182

183
184
185
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
186
func GetCPUMem() (memInfo, error) {
187
188
189
190
191
192
193
194
195
196
197
198
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

199
200
201
202
func FindGPULibs(baseLibName string, patterns []string) []string {
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
203
	slog.Debug("Searching for GPU library", "name", baseLibName)
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
221
	slog.Debug("gpu library search", "globs", patterns)
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
	for _, pattern := range patterns {
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
249
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
250
251
252
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
253
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
254
	var resp C.cudart_init_resp_t
255
	resp.ch.verbose = getVerboseState()
256
	for _, libPath := range cudartLibPaths {
257
258
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
259
		C.cudart_init(lib, &resp)
260
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
261
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
262
263
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
264
			return int(resp.num_devices), &resp.ch, libPath
265
266
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
267
	return 0, nil, ""
268
269
}

270
271
272
273
274
275
func getVerboseState() C.uint16_t {
	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}