config.go 8.8 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
5
package envconfig

import (
	"fmt"
	"log/slog"
xuxzh1's avatar
init  
xuxzh1 committed
6
7
8
	"math"
	"net"
	"net/url"
mashun1's avatar
v1  
mashun1 committed
9
10
11
12
13
	"os"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
xuxzh1's avatar
init  
xuxzh1 committed
14
	"time"
mashun1's avatar
v1  
mashun1 committed
15
16
)

xuxzh1's avatar
init  
xuxzh1 committed
17
18
19
20
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
func Host() *url.URL {
	defaultPort := "11434"
mashun1's avatar
v1  
mashun1 committed
21

xuxzh1's avatar
init  
xuxzh1 committed
22
23
24
25
26
27
28
29
30
31
	s := strings.TrimSpace(Var("OLLAMA_HOST"))
	scheme, hostport, ok := strings.Cut(s, "://")
	switch {
	case !ok:
		scheme, hostport = "http", s
	case scheme == "http":
		defaultPort = "80"
	case scheme == "https":
		defaultPort = "443"
	}
mashun1's avatar
v1  
mashun1 committed
32

xuxzh1's avatar
init  
xuxzh1 committed
33
34
35
36
37
38
39
40
41
42
43
	// trim trailing slashes
	hostport = strings.TrimRight(hostport, "/")

	host, port, err := net.SplitHostPort(hostport)
	if err != nil {
		host, port = "127.0.0.1", defaultPort
		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
			host = ip.String()
		} else if hostport != "" {
			host = hostport
		}
mashun1's avatar
v1  
mashun1 committed
44
45
	}

xuxzh1's avatar
init  
xuxzh1 committed
46
47
48
49
50
51
	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
		return &url.URL{
			Scheme: scheme,
			Host:   net.JoinHostPort(host, defaultPort),
		}
mashun1's avatar
v1  
mashun1 committed
52
53
	}

xuxzh1's avatar
init  
xuxzh1 committed
54
55
56
57
	return &url.URL{
		Scheme: scheme,
		Host:   net.JoinHostPort(host, port),
	}
mashun1's avatar
v1  
mashun1 committed
58
59
}

xuxzh1's avatar
init  
xuxzh1 committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
func Origins() (origins []string) {
	if s := Var("OLLAMA_ORIGINS"); s != "" {
		origins = strings.Split(s, ",")
	}

	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
		origins = append(origins,
			fmt.Sprintf("http://%s", origin),
			fmt.Sprintf("https://%s", origin),
			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
		)
	}
mashun1's avatar
v1  
mashun1 committed
74

xuxzh1's avatar
init  
xuxzh1 committed
75
76
77
78
79
	origins = append(origins,
		"app://*",
		"file://*",
		"tauri://*",
	)
mashun1's avatar
v1  
mashun1 committed
80

xuxzh1's avatar
init  
xuxzh1 committed
81
	return origins
mashun1's avatar
v1  
mashun1 committed
82
83
}

xuxzh1's avatar
init  
xuxzh1 committed
84
85
86
87
88
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
func Models() string {
	if s := Var("OLLAMA_MODELS"); s != "" {
		return s
mashun1's avatar
v1  
mashun1 committed
89
90
	}

xuxzh1's avatar
init  
xuxzh1 committed
91
92
93
	home, err := os.UserHomeDir()
	if err != nil {
		panic(err)
mashun1's avatar
v1  
mashun1 committed
94
95
	}

xuxzh1's avatar
init  
xuxzh1 committed
96
97
	return filepath.Join(home, ".ollama", "models")
}
mashun1's avatar
v1  
mashun1 committed
98

xuxzh1's avatar
init  
xuxzh1 committed
99
100
101
102
103
104
105
106
107
108
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func KeepAlive() (keepAlive time.Duration) {
	keepAlive = 5 * time.Minute
	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
		if d, err := time.ParseDuration(s); err == nil {
			keepAlive = d
		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
			keepAlive = time.Duration(n) * time.Second
mashun1's avatar
v1  
mashun1 committed
109
		}
xuxzh1's avatar
init  
xuxzh1 committed
110
	}
mashun1's avatar
v1  
mashun1 committed
111

xuxzh1's avatar
init  
xuxzh1 committed
112
113
114
115
116
117
	if keepAlive < 0 {
		return time.Duration(math.MaxInt64)
	}

	return keepAlive
}
mashun1's avatar
v1  
mashun1 committed
118

xuxzh1's avatar
init  
xuxzh1 committed
119
120
121
122
123
124
func Bool(k string) func() bool {
	return func() bool {
		if s := Var(k); s != "" {
			b, err := strconv.ParseBool(s)
			if err != nil {
				return true
mashun1's avatar
v1  
mashun1 committed
125
			}
xuxzh1's avatar
init  
xuxzh1 committed
126
127

			return b
mashun1's avatar
v1  
mashun1 committed
128
		}
xuxzh1's avatar
init  
xuxzh1 committed
129
130

		return false
mashun1's avatar
v1  
mashun1 committed
131
	}
xuxzh1's avatar
init  
xuxzh1 committed
132
}
mashun1's avatar
v1  
mashun1 committed
133

xuxzh1's avatar
init  
xuxzh1 committed
134
135
136
137
138
139
140
141
142
143
144
145
146
147
var (
	// Debug enabled additional debug information.
	Debug = Bool("OLLAMA_DEBUG")
	// FlashAttention enables the experimental flash attention feature.
	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
	// NoHistory disables readline history.
	NoHistory = Bool("OLLAMA_NOHISTORY")
	// NoPrune disables pruning of model blobs on startup.
	NoPrune = Bool("OLLAMA_NOPRUNE")
	// SchedSpread allows scheduling models across all GPUs.
	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
	// IntelGPU enables experimental Intel GPU detection.
	IntelGPU = Bool("OLLAMA_INTEL_GPU")
)
mashun1's avatar
v1  
mashun1 committed
148

xuxzh1's avatar
init  
xuxzh1 committed
149
150
151
func String(s string) func() string {
	return func() string {
		return Var(s)
mashun1's avatar
v1  
mashun1 committed
152
	}
xuxzh1's avatar
init  
xuxzh1 committed
153
154
155
156
157
}

var (
	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
	TmpDir     = String("OLLAMA_TMPDIR")
mashun1's avatar
v1  
mashun1 committed
158

xuxzh1's avatar
init  
xuxzh1 committed
159
160
161
162
163
164
	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
)
mashun1's avatar
v1  
mashun1 committed
165

xuxzh1's avatar
init  
xuxzh1 committed
166
167
168
func RunnersDir() (p string) {
	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
		return p
mashun1's avatar
v1  
mashun1 committed
169
170
	}

xuxzh1's avatar
init  
xuxzh1 committed
171
172
	if runtime.GOOS != "windows" {
		return
mashun1's avatar
v1  
mashun1 committed
173
174
	}

xuxzh1's avatar
init  
xuxzh1 committed
175
176
177
178
179
180
181
182
183
184
	defer func() {
		if p == "" {
			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
		}
	}()

	// On Windows we do not carry the payloads inside the main executable
	exe, err := os.Executable()
	if err != nil {
		return
mashun1's avatar
v1  
mashun1 committed
185
186
	}

xuxzh1's avatar
init  
xuxzh1 committed
187
188
189
	cwd, err := os.Getwd()
	if err != nil {
		return
mashun1's avatar
v1  
mashun1 committed
190
	}
xuxzh1's avatar
init  
xuxzh1 committed
191
192
193
194
195
196
197

	var paths []string
	for _, root := range []string{filepath.Dir(exe), cwd} {
		paths = append(paths,
			root,
			filepath.Join(root, "windows-"+runtime.GOARCH),
			filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
mashun1's avatar
v1  
mashun1 committed
198
199
200
		)
	}

xuxzh1's avatar
init  
xuxzh1 committed
201
202
203
204
205
206
	// Try a few variations to improve developer experience when building from source in the local tree
	for _, path := range paths {
		candidate := filepath.Join(path, "ollama_runners")
		if _, err := os.Stat(candidate); err == nil {
			p = candidate
			break
mashun1's avatar
v1  
mashun1 committed
207
208
209
		}
	}

xuxzh1's avatar
init  
xuxzh1 committed
210
211
212
213
214
215
216
217
218
219
220
	return p
}

func Uint(key string, defaultValue uint) func() uint {
	return func() uint {
		if s := Var(key); s != "" {
			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
			} else {
				return uint(n)
			}
mashun1's avatar
v1  
mashun1 committed
221
		}
xuxzh1's avatar
init  
xuxzh1 committed
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

		return defaultValue
	}
}

var (
	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
)

type EnvVar struct {
	Name        string
	Value       any
	Description string
}

func AsMap() map[string]EnvVar {
	ret := map[string]EnvVar{
		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
	}
	if runtime.GOOS != "darwin" {
		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
mashun1's avatar
v1  
mashun1 committed
269
	}
xuxzh1's avatar
init  
xuxzh1 committed
270
271
272
273
274
275
276
277
278
279
	return ret
}

func Values() map[string]string {
	vals := make(map[string]string)
	for k, v := range AsMap() {
		vals[k] = fmt.Sprintf("%v", v.Value)
	}
	return vals
}
mashun1's avatar
v1  
mashun1 committed
280

xuxzh1's avatar
init  
xuxzh1 committed
281
282
283
// Var returns an environment variable stripped of leading and trailing quotes or spaces
func Var(key string) string {
	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
mashun1's avatar
v1  
mashun1 committed
284
}