config.go 8.8 KB
Newer Older
wangkx1's avatar
init  
wangkx1 committed
1
2
3
4
5
package envconfig

import (
	"fmt"
	"log/slog"
wangkx1's avatar
wangkx1 committed
6
	"math"
wangkx1's avatar
init  
wangkx1 committed
7
	"net"
wangkx1's avatar
wangkx1 committed
8
	"net/url"
wangkx1's avatar
init  
wangkx1 committed
9
10
11
12
13
	"os"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
wangkx1's avatar
wangkx1 committed
14
	"time"
wangkx1's avatar
init  
wangkx1 committed
15
16
)

wangkx1's avatar
wangkx1 committed
17
18
19
20
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
func Host() *url.URL {
	defaultPort := "11434"
wangkx1's avatar
init  
wangkx1 committed
21

wangkx1's avatar
wangkx1 committed
22
23
24
25
26
27
28
29
30
31
	s := strings.TrimSpace(Var("OLLAMA_HOST"))
	scheme, hostport, ok := strings.Cut(s, "://")
	switch {
	case !ok:
		scheme, hostport = "http", s
	case scheme == "http":
		defaultPort = "80"
	case scheme == "https":
		defaultPort = "443"
	}
wangkx1's avatar
init  
wangkx1 committed
32

wangkx1's avatar
wangkx1 committed
33
34
	// trim trailing slashes
	hostport = strings.TrimRight(hostport, "/")
wangkx1's avatar
init  
wangkx1 committed
35

wangkx1's avatar
wangkx1 committed
36
37
38
39
40
41
42
43
44
	host, port, err := net.SplitHostPort(hostport)
	if err != nil {
		host, port = "127.0.0.1", defaultPort
		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
			host = ip.String()
		} else if hostport != "" {
			host = hostport
		}
	}
wangkx1's avatar
init  
wangkx1 committed
45

wangkx1's avatar
wangkx1 committed
46
47
48
49
50
51
	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
		return &url.URL{
			Scheme: scheme,
			Host:   net.JoinHostPort(host, defaultPort),
		}
wangkx1's avatar
init  
wangkx1 committed
52
	}
wangkx1's avatar
wangkx1 committed
53
54
55
56

	return &url.URL{
		Scheme: scheme,
		Host:   net.JoinHostPort(host, port),
wangkx1's avatar
init  
wangkx1 committed
57
58
59
	}
}

wangkx1's avatar
wangkx1 committed
60
61
62
63
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
func Origins() (origins []string) {
	if s := Var("OLLAMA_ORIGINS"); s != "" {
		origins = strings.Split(s, ",")
wangkx1's avatar
init  
wangkx1 committed
64
65
	}

wangkx1's avatar
wangkx1 committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
		origins = append(origins,
			fmt.Sprintf("http://%s", origin),
			fmt.Sprintf("https://%s", origin),
			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
		)
	}

	origins = append(origins,
		"app://*",
		"file://*",
		"tauri://*",
	)
wangkx1's avatar
init  
wangkx1 committed
80

wangkx1's avatar
wangkx1 committed
81
	return origins
wangkx1's avatar
init  
wangkx1 committed
82
83
}

wangkx1's avatar
wangkx1 committed
84
85
86
87
88
89
90
91
92
93
94
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
func Models() string {
	if s := Var("OLLAMA_MODELS"); s != "" {
		return s
	}

	home, err := os.UserHomeDir()
	if err != nil {
		panic(err)
	}
wangkx1's avatar
init  
wangkx1 committed
95

wangkx1's avatar
wangkx1 committed
96
	return filepath.Join(home, ".ollama", "models")
wangkx1's avatar
init  
wangkx1 committed
97
98
}

wangkx1's avatar
wangkx1 committed
99
100
101
102
103
104
105
106
107
108
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func KeepAlive() (keepAlive time.Duration) {
	keepAlive = 5 * time.Minute
	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
		if d, err := time.ParseDuration(s); err == nil {
			keepAlive = d
		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
			keepAlive = time.Duration(n) * time.Second
wangkx1's avatar
init  
wangkx1 committed
109
110
111
		}
	}

wangkx1's avatar
wangkx1 committed
112
113
	if keepAlive < 0 {
		return time.Duration(math.MaxInt64)
wangkx1's avatar
init  
wangkx1 committed
114
115
	}

wangkx1's avatar
wangkx1 committed
116
117
	return keepAlive
}
wangkx1's avatar
init  
wangkx1 committed
118

wangkx1's avatar
wangkx1 committed
119
120
121
122
123
124
125
func Bool(k string) func() bool {
	return func() bool {
		if s := Var(k); s != "" {
			b, err := strconv.ParseBool(s)
			if err != nil {
				return true
			}
wangkx1's avatar
init  
wangkx1 committed
126

wangkx1's avatar
wangkx1 committed
127
			return b
wangkx1's avatar
init  
wangkx1 committed
128
129
		}

wangkx1's avatar
wangkx1 committed
130
		return false
wangkx1's avatar
init  
wangkx1 committed
131
	}
wangkx1's avatar
wangkx1 committed
132
}
wangkx1's avatar
init  
wangkx1 committed
133

wangkx1's avatar
wangkx1 committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
var (
	// Debug enabled additional debug information.
	Debug = Bool("OLLAMA_DEBUG")
	// FlashAttention enables the experimental flash attention feature.
	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
	// NoHistory disables readline history.
	NoHistory = Bool("OLLAMA_NOHISTORY")
	// NoPrune disables pruning of model blobs on startup.
	NoPrune = Bool("OLLAMA_NOPRUNE")
	// SchedSpread allows scheduling models across all GPUs.
	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
	// IntelGPU enables experimental Intel GPU detection.
	IntelGPU = Bool("OLLAMA_INTEL_GPU")
)
wangkx1's avatar
init  
wangkx1 committed
148

wangkx1's avatar
wangkx1 committed
149
150
151
func String(s string) func() string {
	return func() string {
		return Var(s)
wangkx1's avatar
init  
wangkx1 committed
152
	}
wangkx1's avatar
wangkx1 committed
153
}
wangkx1's avatar
init  
wangkx1 committed
154

wangkx1's avatar
wangkx1 committed
155
156
157
158
159
160
161
162
163
164
var (
	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
	TmpDir     = String("OLLAMA_TMPDIR")

	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
)
wangkx1's avatar
init  
wangkx1 committed
165

wangkx1's avatar
wangkx1 committed
166
167
168
func RunnersDir() (p string) {
	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
		return p
wangkx1's avatar
init  
wangkx1 committed
169
170
	}

wangkx1's avatar
wangkx1 committed
171
172
	if runtime.GOOS != "windows" {
		return
wangkx1's avatar
init  
wangkx1 committed
173
174
	}

wangkx1's avatar
wangkx1 committed
175
176
177
	defer func() {
		if p == "" {
			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
wangkx1's avatar
init  
wangkx1 committed
178
		}
wangkx1's avatar
wangkx1 committed
179
	}()
wangkx1's avatar
init  
wangkx1 committed
180

wangkx1's avatar
wangkx1 committed
181
182
183
184
	// On Windows we do not carry the payloads inside the main executable
	exe, err := os.Executable()
	if err != nil {
		return
wangkx1's avatar
init  
wangkx1 committed
185
186
	}

wangkx1's avatar
wangkx1 committed
187
188
189
	cwd, err := os.Getwd()
	if err != nil {
		return
wangkx1's avatar
init  
wangkx1 committed
190
191
	}

wangkx1's avatar
wangkx1 committed
192
193
194
195
196
197
198
	var paths []string
	for _, root := range []string{filepath.Dir(exe), cwd} {
		paths = append(paths,
			root,
			filepath.Join(root, "windows-"+runtime.GOARCH),
			filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
		)
wangkx1's avatar
init  
wangkx1 committed
199
200
	}

wangkx1's avatar
wangkx1 committed
201
202
203
204
205
206
	// Try a few variations to improve developer experience when building from source in the local tree
	for _, path := range paths {
		candidate := filepath.Join(path, "ollama_runners")
		if _, err := os.Stat(candidate); err == nil {
			p = candidate
			break
wangkx1's avatar
init  
wangkx1 committed
207
208
209
		}
	}

wangkx1's avatar
wangkx1 committed
210
211
	return p
}
wangkx1's avatar
init  
wangkx1 committed
212

wangkx1's avatar
wangkx1 committed
213
214
215
216
217
218
219
220
221
func Uint(key string, defaultValue uint) func() uint {
	return func() uint {
		if s := Var(key); s != "" {
			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
			} else {
				return uint(n)
			}
		}
wangkx1's avatar
init  
wangkx1 committed
222

wangkx1's avatar
wangkx1 committed
223
		return defaultValue
wangkx1's avatar
init  
wangkx1 committed
224
	}
wangkx1's avatar
wangkx1 committed
225
}
wangkx1's avatar
init  
wangkx1 committed
226

wangkx1's avatar
wangkx1 committed
227
228
229
230
231
232
233
234
235
236
var (
	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
)
wangkx1's avatar
init  
wangkx1 committed
237

wangkx1's avatar
wangkx1 committed
238
239
240
241
type EnvVar struct {
	Name        string
	Value       any
	Description string
wangkx1's avatar
init  
wangkx1 committed
242
243
}

wangkx1's avatar
wangkx1 committed
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
func AsMap() map[string]EnvVar {
	ret := map[string]EnvVar{
		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
wangkx1's avatar
init  
wangkx1 committed
261
	}
wangkx1's avatar
wangkx1 committed
262
263
264
265
266
267
268
	if runtime.GOOS != "darwin" {
		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
wangkx1's avatar
init  
wangkx1 committed
269
	}
wangkx1's avatar
wangkx1 committed
270
	return ret
wangkx1's avatar
init  
wangkx1 committed
271
272
}

wangkx1's avatar
wangkx1 committed
273
274
275
276
func Values() map[string]string {
	vals := make(map[string]string)
	for k, v := range AsMap() {
		vals[k] = fmt.Sprintf("%v", v.Value)
wangkx1's avatar
init  
wangkx1 committed
277
	}
wangkx1's avatar
wangkx1 committed
278
279
	return vals
}
wangkx1's avatar
init  
wangkx1 committed
280

wangkx1's avatar
wangkx1 committed
281
282
283
// Var returns an environment variable stripped of leading and trailing quotes or spaces
func Var(key string) string {
	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
wangkx1's avatar
init  
wangkx1 committed
284
}