gpu.go 4.5 KB
Newer Older
1
package discover
2
3

import (
4
	"context"
5
	"log/slog"
6
7
	"os"
	"path/filepath"
8
	"regexp"
9
	"runtime"
10
	"strconv"
11
	"strings"
Michael Yang's avatar
Michael Yang committed
12

13
	"github.com/ollama/ollama/format"
14
	"github.com/ollama/ollama/ml"
15
16
)

17
18
19
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
20

21
22
23
24
func GetCPUInfo() GpuInfo {
	mem, err := GetCPUMem()
	if err != nil {
		slog.Warn("error looking up system memory", "error", err)
25
26
	}

27
28
29
30
31
32
	return GpuInfo{
		memInfo: mem,
		DeviceID: ml.DeviceID{
			Library: "cpu",
			ID:      "0",
		},
33
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
34
35
}

36
37
38
func GetGPUInfo(ctx context.Context, runners []FilteredRunnerDiscovery) GpuInfoList {
	devs := GPUDevices(ctx, runners)
	return devInfoToInfoList(devs)
39
40
}

41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
	resp := []GpuInfo{}
	// Our current packaging model places ggml-hip in the main directory
	// but keeps rocm in an isolated directory.  We have to add it to
	// the [LD_LIBRARY_]PATH so ggml-hip will load properly
	rocmDir := filepath.Join(LibOllamaPath, "rocm")
	if _, err := os.Stat(rocmDir); err != nil {
		rocmDir = ""
	}

	for _, dev := range devs {
		info := GpuInfo{
			DeviceID: dev.DeviceID,
			filterID: dev.FilteredID,
			Name:     dev.Description,
			memInfo: memInfo{
				TotalMemory: dev.TotalMemory,
				FreeMemory:  dev.FreeMemory,
59
			},
60
61
62
63
			// TODO can we avoid variant
			DependencyPath: dev.LibraryPath,
			DriverMajor:    dev.DriverMajor,
			DriverMinor:    dev.DriverMinor,
64
65
			ComputeMajor:   dev.ComputeMajor,
			ComputeMinor:   dev.ComputeMinor,
Michael Yang's avatar
lint  
Michael Yang committed
66
		}
67
68
		if dev.Library == "CUDA" || dev.Library == "ROCm" {
			info.MinimumMemory = 457 * format.MebiByte
Daniel Hiltgen's avatar
Daniel Hiltgen committed
69
		}
70
71
		if dev.Library == "ROCm" && rocmDir != "" {
			info.DependencyPath = append(info.DependencyPath, rocmDir)
72
		}
73
		resp = append(resp, info)
74
	}
75
	if len(resp) == 0 {
76
77
78
		mem, err := GetCPUMem()
		if err != nil {
			slog.Warn("error looking up system memory", "error", err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
79
80
		}

81
82
83
84
85
86
87
		resp = append(resp, GpuInfo{
			memInfo: mem,
			DeviceID: ml.DeviceID{
				Library: "cpu",
				ID:      "0",
			},
		})
88
89
90
91
	}
	return resp
}

92
93
94
95
96
97
98
// Given the list of GPUs this instantiation is targeted for,
// figure out the visible devices environment variable
//
// If different libraries are detected, the first one is what we use
func (l GpuInfoList) GetVisibleDevicesEnv() []string {
	if len(l) == 0 {
		return nil
99
	}
100
	return []string{rocmGetVisibleDevicesEnv(l)}
101
102
}

103
104
105
106
107
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
	ids := []string{}
	for _, info := range gpuInfo {
		if info.Library != "ROCm" {
			continue
108
		}
109
110
111
		// If the devices requires a numeric ID, for filtering purposes, we use the unfiltered ID number
		if info.filterID != "" {
			ids = append(ids, info.filterID)
Wang,Zhe's avatar
Wang,Zhe committed
112
		} else {
113
			ids = append(ids, info.ID)
Wang,Zhe's avatar
Wang,Zhe committed
114
115
		}
	}
116
117
	if len(ids) == 0 {
		return ""
118
	}
119
120
121
	envVar := "ROCR_VISIBLE_DEVICES="
	if runtime.GOOS != "linux" {
		envVar = "HIP_VISIBLE_DEVICES="
Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
	}
123
124
125
126
127
	// There are 3 potential env vars to use to select GPUs.
	// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
	// HIP_VISIBLE_DEVICES supports numeric IDs only
	// GPU_DEVICE_ORDINAL supports numeric IDs only
	return envVar + strings.Join(ids, ",")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
128
}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
129

130
// GetSystemInfo returns the last cached state of the GPUs on the system
131
func GetSystemInfo() SystemInfo {
132
133
134
	deviceMu.Lock()
	defer deviceMu.Unlock()
	gpus := devInfoToInfoList(devices)
135
136
137
138
139
	if len(gpus) == 1 && gpus[0].Library == "cpu" {
		gpus = []GpuInfo{}
	}

	return SystemInfo{
140
141
142
143
144
		System: CPUInfo{
			CPUs:    GetCPUDetails(),
			GpuInfo: GetCPUInfo(),
		},
		GPUs: gpus,
145
146
	}
}
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

func cudaJetpack() string {
	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
		if CudaTegra != "" {
			ver := strings.Split(CudaTegra, ".")
			if len(ver) > 0 {
				return "jetpack" + ver[0]
			}
		} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
			r := regexp.MustCompile(` R(\d+) `)
			m := r.FindSubmatch(data)
			if len(m) != 2 {
				slog.Info("Unexpected format for /etc/nv_tegra_release.  Set JETSON_JETPACK to select version")
			} else {
				if l4t, err := strconv.Atoi(string(m[1])); err == nil {
					// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
					// https://developer.nvidia.com/embedded/jetpack-archive
					switch l4t {
					case 35:
						return "jetpack5"
					case 36:
						return "jetpack6"
					default:
						// Newer Jetson systems use the SBSU runtime
						slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
					}
				}
			}
		}
	}
	return ""
}