gpu.go 5.08 KB
Newer Older
1
package discover
2
3

import (
4
	"context"
5
	"log/slog"
6
7
	"os"
	"path/filepath"
8
	"regexp"
9
	"runtime"
10
	"strconv"
11
	"strings"
Michael Yang's avatar
Michael Yang committed
12

13
	"github.com/ollama/ollama/format"
14
	"github.com/ollama/ollama/ml"
15
16
)

17
18
19
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
20

21
22
23
24
func GetCPUInfo() GpuInfo {
	mem, err := GetCPUMem()
	if err != nil {
		slog.Warn("error looking up system memory", "error", err)
25
26
	}

27
28
29
30
31
32
	return GpuInfo{
		memInfo: mem,
		DeviceID: ml.DeviceID{
			Library: "cpu",
			ID:      "0",
		},
33
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
}

36
37
38
func GetGPUInfo(ctx context.Context, runners []FilteredRunnerDiscovery) GpuInfoList {
	devs := GPUDevices(ctx, runners)
	return devInfoToInfoList(devs)
39
40
}

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
	resp := []GpuInfo{}
	// Our current packaging model places ggml-hip in the main directory
	// but keeps rocm in an isolated directory.  We have to add it to
	// the [LD_LIBRARY_]PATH so ggml-hip will load properly
	rocmDir := filepath.Join(LibOllamaPath, "rocm")
	if _, err := os.Stat(rocmDir); err != nil {
		rocmDir = ""
	}

	for _, dev := range devs {
		info := GpuInfo{
			DeviceID: dev.DeviceID,
			filterID: dev.FilteredID,
			Name:     dev.Description,
			memInfo: memInfo{
				TotalMemory: dev.TotalMemory,
				FreeMemory:  dev.FreeMemory,
59
			},
60
61
62
63
			// TODO can we avoid variant
			DependencyPath: dev.LibraryPath,
			DriverMajor:    dev.DriverMajor,
			DriverMinor:    dev.DriverMinor,
64
65
			ComputeMajor:   dev.ComputeMajor,
			ComputeMinor:   dev.ComputeMinor,
Michael Yang's avatar
lint  
Michael Yang committed
66
		}
67
68
		if dev.Library == "CUDA" || dev.Library == "ROCm" {
			info.MinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
69
		}
70
71
		if dev.Library == "ROCm" && rocmDir != "" {
			info.DependencyPath = append(info.DependencyPath, rocmDir)
72
		}
73
		// TODO any special processing of Vulkan devices?
74
		resp = append(resp, info)
75
	}
76
	if len(resp) == 0 {
77
78
79
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
80
81
		}

82
83
84
85
86
87
88
		resp = append(resp, GpuInfo{
			memInfo: mem,
			DeviceID: ml.DeviceID{
				Library: "cpu",
				ID:      "0",
			},
		})
89
90
91
92
	}
	return resp
}

93
94
95
96
97
98
99
// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
	if len(l) == 0 {
		return nil
100
	}
101
102
103
104
105
106
107
108
109
110
	res := []string{}
	envVar := rocmGetVisibleDevicesEnv(l)
	if envVar != "" {
		res = append(res, envVar)
	}
	envVar = vkGetVisibleDevicesEnv(l)
	if envVar != "" {
		res = append(res, envVar)
	}
	return res
111
112
}

113
114
115
116
117
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
	ids := []string{}
	for _, info := range gpuInfo {
		if info.Library != "ROCm" {
			continue
118
		}
119
120
121
		// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
		if info.filterID != "" {
			ids = append(ids, info.filterID)
Wang,Zhe's avatar
Wang,Zhe committed
122
		} else {
123
			ids = append(ids, info.ID)
Wang,Zhe's avatar
Wang,Zhe committed
124
125
		}
	}
126
127
	if len(ids) == 0 {
		return ""
128
	}
129
130
131
	envVar := "ROCR_VISIBLE_DEVICES="
	if runtime.GOOS != "linux" {
		envVar = "HIP_VISIBLE_DEVICES="
Daniel Hiltgen's avatar
Daniel Hiltgen committed
132
	}
133
134
135
136
137
	// There are 3 potential env vars to use to select GPUs.
	// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
	// HIP_VISIBLE_DEVICES supports numeric IDs only
	// GPU_DEVICE_ORDINAL supports numeric IDs only
	return envVar + strings.Join(ids, ",")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
138
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
139

140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
	ids := []string{}
	for _, info := range gpuInfo {
		if info.Library != "Vulkan" {
			continue
		}
		if info.filterID != "" {
			ids = append(ids, info.filterID)
		} else {
			ids = append(ids, info.ID)
		}
	}
	if len(ids) == 0 {
		return ""
	}
	envVar := "GGML_VK_VISIBLE_DEVICES="
	return envVar + strings.Join(ids, ",")
}

159
// GetSystemInfo returns the last cached state of the GPUs on the system
160
func GetSystemInfo() SystemInfo {
161
162
163
	deviceMu.Lock()
	defer deviceMu.Unlock()
	gpus := devInfoToInfoList(devices)
164
165
166
167
168
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
169
170
171
172
173
		System: CPUInfo{
			CPUs:    GetCPUDetails(),
			GpuInfo: GetCPUInfo(),
		},
		GPUs: gpus,
174
175
	}
}
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

func cudaJetpack() string {
	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
		if CudaTegra != "" {
			ver := strings.Split(CudaTegra, ".")
			if len(ver) > 0 {
				return "jetpack" + ver[0]
			}
		} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
			r := regexp.MustCompile(` R(\d+) `)
			m := r.FindSubmatch(data)
			if len(m) != 2 {
				slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
			} else {
				if l4t, err := strconv.Atoi(string(m[1])); err == nil {
					// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
					// https://developer.nvidia.com/embedded/jetpack-archive
					switch l4t {
					case 35:
						return "jetpack5"
					case 36:
						return "jetpack6"
					default:
						// Newer Jetson systems use the SBSU runtime
						slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
					}
				}
			}
		}
	}
	return ""
}