common.go 4.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
package runners

import (
	"log/slog"
	"os"
	"path/filepath"
	"runtime"
	"slices"
	"strings"
	"sync"

12
	"golang.org/x/sys/cpu"
13
14
15
16
17
18

	"github.com/ollama/ollama/envconfig"
)

var (
	runnersDir = ""
19
	once       = sync.Once{}
20
21
)

22
type CPUCapability uint32
23

24
25
// Override at build time when building base GPU runners
// var GPURunnerCPUCapability = CPUCapabilityAVX
26

27
28
29
30
31
32
const (
	CPUCapabilityNone CPUCapability = iota
	CPUCapabilityAVX
	CPUCapabilityAVX2
	// TODO AVX512
)
33

34
35
36
37
38
39
40
41
func (c CPUCapability) String() string {
	switch c {
	case CPUCapabilityAVX:
		return "avx"
	case CPUCapabilityAVX2:
		return "avx2"
	default:
		return "no vector extensions"
42
43
44
	}
}

45
46
47
func GetCPUCapability() CPUCapability {
	if cpu.X86.HasAVX2 {
		return CPUCapabilityAVX2
48
	}
49
50
	if cpu.X86.HasAVX {
		return CPUCapabilityAVX
51
	}
52
53
	// else LCD
	return CPUCapabilityNone
54
55
}

56
57
58
59
60
// Return the location where runners were located
// empty string indicates only builtin is present
func Locate() string {
	once.Do(locateRunnersOnce)
	return runnersDir
61
62
}

63
64
65
66
67
// searches for runners in a prioritized set of locations
// 1. local build, with executable at the top of the tree
// 2. lib directory relative to executable
func locateRunnersOnce() {
	exe, err := os.Executable()
68
	if err != nil {
69
		slog.Debug("runner locate", "error", err)
70
71
	}

72
73
74
	paths := []string{
		filepath.Join(filepath.Dir(exe), "llama", "build", runtime.GOOS+"-"+runtime.GOARCH, "runners"),
		filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe(), "lib", "ollama", "runners"),
75
	}
76
77
78
79
80
	for _, path := range paths {
		if _, err := os.Stat(path); err == nil {
			runnersDir = path
			slog.Debug("runners located", "dir", runnersDir)
			return
81
82
		}
	}
83
84
85
	// Fall back to built-in
	slog.Debug("no dynamic runners detected, using only built-in")
	runnersDir = ""
86
87
}

88
89
90
91
// Return the well-known name of the builtin runner for the given platform
func BuiltinName() string {
	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
		return "metal"
92
	}
93
	return "cpu"
94
95
96
97
98
99
}

// directory names are the name of the runner and may contain an optional
// variant prefixed with '_' as the separator. For example, "cuda_v11" and
// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
// lowest common denominator
100
101
102
103
104
105
106
107
108
109
110
func GetAvailableServers() map[string]string {
	once.Do(locateRunnersOnce)

	servers := make(map[string]string)
	exe, err := os.Executable()
	if err == nil {
		servers[BuiltinName()] = exe
	}

	if runnersDir == "" {
		return servers
111
112
	}

113
114
	// glob runnersDir for files that start with ollama_
	pattern := filepath.Join(runnersDir, "*", "ollama_*")
115
116
117
118
119
120
121
122
123

	files, err := filepath.Glob(pattern)
	if err != nil {
		slog.Debug("could not glob", "pattern", pattern, "error", err)
		return nil
	}

	for _, file := range files {
		slog.Debug("availableServers : found", "file", file)
124
125
126
127
128
129
130
131
132
133
		runnerName := filepath.Base(filepath.Dir(file))
		// Special case for our GPU runners - if compiled with standard AVX flag
		// detect incompatible system
		// Custom builds will omit this and its up to the user to ensure compatibility
		parsed := strings.Split(runnerName, "_")
		if len(parsed) == 3 && parsed[2] == "avx" && !cpu.X86.HasAVX {
			slog.Info("GPU runner incompatible with host system, CPU does not have AVX", "runner", runnerName)
			continue
		}
		servers[runnerName] = file
134
135
136
137
138
	}

	return servers
}

139
140
// serversForGpu returns a list of compatible servers give the provided GPU library/variant
func ServersForGpu(requested string) []string {
141
	// glob workDir for files that start with ollama_
142
143
144
145
146
	availableServers := GetAvailableServers()

	// Short circuit if the only option is built-in
	if _, ok := availableServers[BuiltinName()]; ok && len(availableServers) == 1 {
		return []string{BuiltinName()}
147
148
	}

149
150
	bestCPUVariant := GetCPUCapability()
	requestedLib := strings.Split(requested, "_")[0]
151
152
153
154
	servers := []string{}

	// exact match first
	for a := range availableServers {
155
156
157
158
159
160
161
		short := a
		parsed := strings.Split(a, "_")
		if len(parsed) == 3 {
			// Strip off optional _avx for comparison
			short = parsed[0] + "_" + parsed[1]
		}
		if a == requested || short == requested {
162
163
164
165
			servers = []string{a}
		}
	}

166
167
168
	// If no exact match, then try without variant
	if len(servers) == 0 {
		alt := []string{}
169
		for a := range availableServers {
170
			if requestedLib == strings.Split(a, "_")[0] && a != requested {
171
172
173
174
175
176
177
				alt = append(alt, a)
			}
		}
		slices.Sort(alt)
		servers = append(servers, alt...)
	}

178
179
180
181
182
183
	// Finally append the best CPU option if found, then builtin
	if bestCPUVariant != CPUCapabilityNone {
		for cmp := range availableServers {
			if cmp == "cpu_"+bestCPUVariant.String() {
				servers = append(servers, cmp)
				break
184
185
186
			}
		}
	}
187
	servers = append(servers, BuiltinName())
188
189
190
191
192
193
	return servers
}

// Return the optimal server for this CPU architecture
func ServerForCpu() string {
	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
194
		return BuiltinName()
195
	}
196
197
198
	variant := GetCPUCapability()
	availableServers := GetAvailableServers()
	if variant != CPUCapabilityNone {
199
200
201
202
203
204
		for cmp := range availableServers {
			if cmp == "cpu_"+variant.String() {
				return cmp
			}
		}
	}
205
	return BuiltinName()
206
}