gpu.go 8.15 KB
Newer Older
1
2
3
4
5
//go:build linux || windows

package gpu

/*
6
7
8
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread

9
10
11
12
13
14
#include "gpu_info.h"

*/
import "C"
import (
	"fmt"
15
	"log/slog"
16
17
	"os"
	"path/filepath"
18
	"runtime"
19
	"strings"
20
21
	"sync"
	"unsafe"
Michael Yang's avatar
Michael Yang committed
22
23

	"github.com/ollama/ollama/format"
24
	"github.com/ollama/ollama/server/envconfig"
25
26
27
)

type handles struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
29
	deviceCount int
	cudart      *C.cudart_handle_t
30
31
}

Michael Yang's avatar
Michael Yang committed
32
const (
33
34
	cudaMinimumMemory = 457 * format.MebiByte
	rocmMinimumMemory = 457 * format.MebiByte
Michael Yang's avatar
Michael Yang committed
35
36
)

37
38
var gpuMutex sync.Mutex

39
40
// With our current CUDA compile flags, older than 5.0 will not work properly
var CudaComputeMin = [2]C.int{5, 0}
41

Daniel Hiltgen's avatar
Daniel Hiltgen committed
42
var RocmComputeMin = 9
43

Daniel Hiltgen's avatar
Daniel Hiltgen committed
44
45
// TODO find a better way to detect iGPU instead of minimum memory
const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
var CudartLinuxGlobs = []string{
	"/usr/local/cuda/lib64/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/x86_64-linux-gnu/libcudart.so*",
	"/usr/lib/wsl/lib/libcudart.so*",
	"/usr/lib/wsl/drivers/*/libcudart.so*",
	"/opt/cuda/lib64/libcudart.so*",
	"/usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so*",
	"/usr/lib/aarch64-linux-gnu/libcudart.so*",
	"/usr/local/cuda/lib*/libcudart.so*",
	"/usr/lib*/libcudart.so*",
	"/usr/local/lib*/libcudart.so*",
}

var CudartWindowsGlobs = []string{
	"c:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*\\bin\\cudart64_*.dll",
}

// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")

70
// Note: gpuMutex must already be held
71
func initGPUHandles() *handles {
72

73
	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
74

Daniel Hiltgen's avatar
Daniel Hiltgen committed
75
	gpuHandles := &handles{}
76
77
78
79
	var cudartMgmtName string
	var cudartMgmtPatterns []string

	tmpDir, _ := PayloadsDir()
80
81
	switch runtime.GOOS {
	case "windows":
82
83
84
85
		cudartMgmtName = "cudart64_*.dll"
		localAppData := os.Getenv("LOCALAPPDATA")
		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", cudartMgmtName)}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartWindowsGlobs...)
86
	case "linux":
87
88
89
90
91
92
		cudartMgmtName = "libcudart.so*"
		if tmpDir != "" {
			// TODO - add "payloads" for subprocess
			cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", cudartMgmtName)}
		}
		cudartMgmtPatterns = append(cudartMgmtPatterns, CudartLinuxGlobs...)
93
	default:
94
		return gpuHandles
95
96
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
97
	slog.Info("Detecting GPUs")
98
99
	cudartLibPaths := FindGPULibs(cudartMgmtName, cudartMgmtPatterns)
	if len(cudartLibPaths) > 0 {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
100
		deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
101
		if cudart != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
102
			slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
103
			gpuHandles.cudart = cudart
Daniel Hiltgen's avatar
Daniel Hiltgen committed
104
			gpuHandles.deviceCount = deviceCount
105
			return gpuHandles
106
107
		}
	}
108
	return gpuHandles
109
110
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
111
func GetGPUInfo() GpuInfoList {
112
113
114
115
	// TODO - consider exploring lspci (and equivalent on windows) to check for
	// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
	gpuMutex.Lock()
	defer gpuMutex.Unlock()
116
117
118
119
120
121
122

	gpuHandles := initGPUHandles()
	defer func() {
		if gpuHandles.cudart != nil {
			C.cudart_release(*gpuHandles.cudart)
		}
	}()
123

124
	// All our GPU builds on x86 have AVX enabled, so fallback to CPU if we don't detect at least AVX
125
	cpuVariant := GetCPUVariant()
126
	if cpuVariant == "" && runtime.GOARCH == "amd64" {
127
128
129
		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
	}

130
	var memInfo C.mem_info_t
Daniel Hiltgen's avatar
Daniel Hiltgen committed
131
132
133
134
135
136
137
	resp := []GpuInfo{}

	// NVIDIA first
	for i := 0; i < gpuHandles.deviceCount; i++ {
		// TODO once we support CPU compilation variants of GPU libraries refine this...
		if cpuVariant == "" && runtime.GOARCH == "amd64" {
			continue
138
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139
140
141
142
		gpuInfo := GpuInfo{
			Library: "cuda",
		}
		C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
143
		if memInfo.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
144
			slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
145
			C.free(unsafe.Pointer(memInfo.err))
Daniel Hiltgen's avatar
Daniel Hiltgen committed
146
			continue
147
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
148
149
150
		if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
			slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
			continue
151
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
152
153
154
155
156
157
158
159
160
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
		gpuInfo.Major = int(memInfo.major)
		gpuInfo.Minor = int(memInfo.minor)
		gpuInfo.MinimumMemory = cudaMinimumMemory

		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
		resp = append(resp, gpuInfo)
161
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
162
163
164
165
166

	// Then AMD
	resp = append(resp, AMDGetGPUInfo()...)

	if len(resp) == 0 {
167
		C.cpu_check_ram(&memInfo)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
168
169
170
171
172
173
174
175
176
177
178
179
180
181
		if memInfo.err != nil {
			slog.Info("error looking up CPU memory", "error", C.GoString(memInfo.err))
			C.free(unsafe.Pointer(memInfo.err))
			return resp
		}
		gpuInfo := GpuInfo{
			Library: "cpu",
			Variant: cpuVariant,
		}
		gpuInfo.TotalMemory = uint64(memInfo.total)
		gpuInfo.FreeMemory = uint64(memInfo.free)
		gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])

		resp = append(resp, gpuInfo)
182
	}
183

184
185
186
	return resp
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
187
func GetCPUMem() (memInfo, error) {
188
189
190
191
192
193
194
195
196
197
198
199
	var ret memInfo
	var info C.mem_info_t
	C.cpu_check_ram(&info)
	if info.err != nil {
		defer C.free(unsafe.Pointer(info.err))
		return ret, fmt.Errorf(C.GoString(info.err))
	}
	ret.FreeMemory = uint64(info.free)
	ret.TotalMemory = uint64(info.total)
	return ret, nil
}

200
201
202
203
func FindGPULibs(baseLibName string, patterns []string) []string {
	// Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
	var ldPaths []string
	gpuLibPaths := []string{}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
204
	slog.Debug("Searching for GPU library", "name", baseLibName)
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

	switch runtime.GOOS {
	case "windows":
		ldPaths = strings.Split(os.Getenv("PATH"), ";")
	case "linux":
		ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
	default:
		return gpuLibPaths
	}
	// Start with whatever we find in the PATH/LD_LIBRARY_PATH
	for _, ldPath := range ldPaths {
		d, err := filepath.Abs(ldPath)
		if err != nil {
			continue
		}
		patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
222
	slog.Debug("gpu library search", "globs", patterns)
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
	for _, pattern := range patterns {
		// Ignore glob discovery errors
		matches, _ := filepath.Glob(pattern)
		for _, match := range matches {
			// Resolve any links so we don't try the same lib multiple times
			// and weed out any dups across globs
			libPath := match
			tmp := match
			var err error
			for ; err == nil; tmp, err = os.Readlink(libPath) {
				if !filepath.IsAbs(tmp) {
					tmp = filepath.Join(filepath.Dir(libPath), tmp)
				}
				libPath = tmp
			}
			new := true
			for _, cmp := range gpuLibPaths {
				if cmp == libPath {
					new = false
					break
				}
			}
			if new {
				gpuLibPaths = append(gpuLibPaths, libPath)
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
250
	slog.Debug("discovered GPU libraries", "paths", gpuLibPaths)
251
252
253
	return gpuLibPaths
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
254
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
255
	var resp C.cudart_init_resp_t
256
	resp.ch.verbose = getVerboseState()
257
	for _, libPath := range cudartLibPaths {
258
259
		lib := C.CString(libPath)
		defer C.free(unsafe.Pointer(lib))
260
		C.cudart_init(lib, &resp)
261
		if resp.err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
262
			slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
263
264
			C.free(unsafe.Pointer(resp.err))
		} else {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
265
			return int(resp.num_devices), &resp.ch, libPath
266
267
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
268
	return 0, nil, ""
269
270
}

271
func getVerboseState() C.uint16_t {
272
	if envconfig.Debug {
273
274
275
276
		return C.uint16_t(1)
	}
	return C.uint16_t(0)
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
	if len(l) == 0 {
		return "", ""
	}
	switch l[0].Library {
	case "cuda":
		return cudaGetVisibleDevicesEnv(l)
	case "rocm":
		return rocmGetVisibleDevicesEnv(l)
	default:
		slog.Debug("no filter required for library " + l[0].Library)
		return "", ""
	}
}