"vscode:/vscode.git/clone" did not exist on "b5633c27a0334fb16852f3b266dafbf3634b6da9"
payload_common.go 7.62 KB
Newer Older
1
2
3
package llm

import (
4
	"compress/gzip"
5
6
7
8
	"errors"
	"fmt"
	"io"
	"io/fs"
9
	"log/slog"
10
11
12
13
14
	"os"
	"path/filepath"
	"runtime"
	"strings"

15
16
17
	"golang.org/x/exp/slices"
	"golang.org/x/sync/errgroup"

18
19
20
	"github.com/jmorganca/ollama/gpu"
)

21
// Libraries names may contain an optional variant separated by '_'
22
// For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
23
// Any library without a variant is the lowest common denominator
24
var availableDynLibs = map[string]string{}
25

26
const pathComponentCount = 7
27

28
29
// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
func getDynLibs(gpuInfo gpu.GpuInfo) []string {
30
31
32
33
	// Short circuit if we know we're using the default built-in (darwin only)
	if gpuInfo.Library == "default" {
		return []string{"default"}
	}
34
35
36
37
38
39
40
	// TODO - temporary until we have multiple CPU variations for Darwin
	// Short circuit on darwin with metal only
	if len(availableDynLibs) == 1 {
		if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
			return []string{availableDynLibs["metal"]}
		}
	}
41

42
	exactMatch := ""
43
44
	dynLibs := []string{}
	altDynLibs := []string{}
45
46
47
48
	requested := gpuInfo.Library
	if gpuInfo.Variant != "" {
		requested += "_" + gpuInfo.Variant
	}
49
	// Try to find an exact match
50
	for cmp := range availableDynLibs {
51
52
		if requested == cmp {
			exactMatch = cmp
53
			dynLibs = []string{availableDynLibs[cmp]}
54
55
56
			break
		}
	}
57
	// Then for GPUs load alternates and sort the list for consistent load ordering
58
	if gpuInfo.Library != "cpu" {
59
		for cmp := range availableDynLibs {
60
			if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
61
				altDynLibs = append(altDynLibs, cmp)
62
63
			}
		}
64
65
66
		slices.Sort(altDynLibs)
		for _, altDynLib := range altDynLibs {
			dynLibs = append(dynLibs, availableDynLibs[altDynLib])
67
68
		}
	}
69
70
71
72
73
74
75
76
77

	// Load up the best CPU variant if not primary requested
	if gpuInfo.Library != "cpu" {
		variant := gpu.GetCPUVariant()
		// If no variant, then we fall back to default
		// If we have a variant, try that if we find an exact match
		// Attempting to run the wrong CPU instructions will panic the
		// process
		if variant != "" {
78
			for cmp := range availableDynLibs {
79
				if cmp == "cpu_"+variant {
80
					dynLibs = append(dynLibs, availableDynLibs[cmp])
81
82
83
84
					break
				}
			}
		} else {
85
			dynLibs = append(dynLibs, availableDynLibs["cpu"])
86
87
88
		}
	}

Michael Yang's avatar
Michael Yang committed
89
	// Finally, if we didn't find any matches, LCD CPU FTW
90
91
	if len(dynLibs) == 0 {
		dynLibs = []string{availableDynLibs["cpu"]}
92
	}
93
	slog.Debug(fmt.Sprintf("ordered list of LLM libraries to try %v", dynLibs))
94
	return dynLibs
95
96
}

97
98
99
func rocmDynLibPresent() bool {
	for dynLibName := range availableDynLibs {
		if strings.HasPrefix(dynLibName, "rocm") {
100
101
102
103
104
105
			return true
		}
	}
	return false
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
106
func nativeInit() error {
107
	slog.Info("Extracting dynamic libraries...")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
108
109
110
111
	assetsDir, err := gpu.AssetsDir()
	if err != nil {
		return err
	}
112
	if runtime.GOOS == "darwin" {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
113
		err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
114
115
116
		if err != nil {
			if err == payloadMissing {
				// TODO perhaps consider this a hard failure on arm macs?
117
				slog.Info("ggml-meta.metal payload missing")
118
119
120
121
				return nil
			}
			return err
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
122
		os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
123
124
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
125
	libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
126
127
	if err != nil {
		if err == payloadMissing {
128
			slog.Info(fmt.Sprintf("%s", payloadMissing))
129
130
131
132
133
134
135
			return nil
		}
		return err
	}
	for _, lib := range libs {
		// The last dir component is the variant name
		variant := filepath.Base(filepath.Dir(lib))
136
		availableDynLibs[variant] = lib
137
138
139
140
141
142
143
	}

	if err := verifyDriverAccess(); err != nil {
		return err
	}

	// Report which dynamic libraries we have loaded to assist troubleshooting
144
	variants := make([]string, len(availableDynLibs))
145
	i := 0
146
	for variant := range availableDynLibs {
147
148
149
		variants[i] = variant
		i++
	}
150
151
	slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
	slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
152
153
154
155

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
156
func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
157
158
159
160
161
162
	files, err := fs.Glob(libEmbed, glob)
	if err != nil || len(files) == 0 {
		return nil, payloadMissing
	}
	libs := []string{}

163
	g := new(errgroup.Group)
164
165
166
	for _, file := range files {
		pathComps := strings.Split(file, "/")
		if len(pathComps) != pathComponentCount {
167
			slog.Error(fmt.Sprintf("unexpected payload components: %v", pathComps))
168
169
170
			continue
		}

171
172
173
174
		file := file
		g.Go(func() error {
			// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
			// Include the variant in the path to avoid conflicts between multiple server libs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
175
			targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
176
			srcFile, err := libEmbed.Open(file)
177
			if err != nil {
178
				return fmt.Errorf("read payload %s: %v", file, err)
179
			}
180
181
			defer srcFile.Close()
			if err := os.MkdirAll(targetDir, 0o755); err != nil {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
182
				return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
183
			}
184
185
			src := io.Reader(srcFile)
			filename := file
186
187
188
189
190
191
			if strings.HasSuffix(file, ".gz") {
				src, err = gzip.NewReader(src)
				if err != nil {
					return fmt.Errorf("decompress payload %s: %v", file, err)
				}
				filename = strings.TrimSuffix(filename, ".gz")
192
193
194
195
196
197
198
			}

			destFile := filepath.Join(targetDir, filepath.Base(filename))
			if strings.Contains(destFile, "server") {
				libs = append(libs, destFile)
			}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
199
200
201
202
203
204
205
			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			if err != nil {
				return fmt.Errorf("write payload %s: %v", file, err)
			}
			defer destFp.Close()
			if _, err := io.Copy(destFp, src); err != nil {
				return fmt.Errorf("copy payload %s: %v", file, err)
206
207
208
			}
			return nil
		})
209
	}
210
	return libs, g.Wait()
211
212
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
213
func extractPayloadFiles(assetsDir, glob string) error {
214
215
216
217
218
219
220
221
222
223
224
	files, err := fs.Glob(libEmbed, glob)
	if err != nil || len(files) == 0 {
		return payloadMissing
	}

	for _, file := range files {
		srcFile, err := libEmbed.Open(file)
		if err != nil {
			return fmt.Errorf("read payload %s: %v", file, err)
		}
		defer srcFile.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
225
226
		if err := os.MkdirAll(assetsDir, 0o755); err != nil {
			return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
227
		}
228
229
		src := io.Reader(srcFile)
		filename := file
230
231
232
233
234
235
		if strings.HasSuffix(file, ".gz") {
			src, err = gzip.NewReader(src)
			if err != nil {
				return fmt.Errorf("decompress payload %s: %v", file, err)
			}
			filename = strings.TrimSuffix(filename, ".gz")
236
		}
237

Daniel Hiltgen's avatar
Daniel Hiltgen committed
238
		destFile := filepath.Join(assetsDir, filepath.Base(filename))
239
240
241
		_, err = os.Stat(destFile)
		switch {
		case errors.Is(err, os.ErrNotExist):
Daniel Hiltgen's avatar
Daniel Hiltgen committed
242
			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
243
244
245
			if err != nil {
				return fmt.Errorf("write payload %s: %v", file, err)
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
246
247
			defer destFp.Close()
			if _, err := io.Copy(destFp, src); err != nil {
248
249
250
251
				return fmt.Errorf("copy payload %s: %v", file, err)
			}
		case err != nil:
			return fmt.Errorf("stat payload %s: %v", file, err)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
252
253
		case err == nil:
			slog.Debug("payload already exists: " + destFile)
254
255
256
257
258
259
260
261
262
263
		}
	}
	return nil
}

func verifyDriverAccess() error {
	if runtime.GOOS != "linux" {
		return nil
	}
	// Only check ROCm access if we have the dynamic lib loaded
264
	if rocmDynLibPresent() {
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
		// Verify we have permissions - either running as root, or we have group access to the driver
		fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
		if err != nil {
			if errors.Is(err, fs.ErrPermission) {
				return fmt.Errorf("Radeon card detected, but permissions not set up properly.  Either run ollama as root, or add you user account to the render group.")
			} else if errors.Is(err, fs.ErrNotExist) {
				// expected behavior without a radeon card
				return nil
			}

			return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
		}
		fd.Close()
	}
	return nil
}