init

0ce8bcfd · xuxzh1 · b0135f4b · 0ce8bcfd · 0ce8bcfd · 0ce8bcfd
Commit 0ce8bcfd authored Nov 12, 2024 by xuxzh1 🎱
20 changed files
--- a/docs/tutorials/langchainjs.md
+++ b/docs/tutorials/langchainjs.md
@@ -15,7 +15,7 @@ import { Ollama } from "@langchain/community/llms/ollama";
 const ollama = new Ollama({
  baseUrl: "http://localhost:11434",
-  model: "llama3",
+  model: "llama3.1",
 });
 const answer = await ollama.invoke(`why is the sky blue?`);
@@ -23,7 +23,7 @@ const answer = await ollama.invoke(`why is the sky blue?`);
 console.log(answer);
 ```
-That will get us the same thing as if we ran `ollama run llama3 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
+That will get us the same thing as if we ran `ollama run llama3.1 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
 ```bash
 npm install cheerio

--- a/docs/tutorials/langchainpy.md
+++ b/docs/tutorials/langchainpy.md
@@ -45,7 +45,7 @@ all_splits = text_splitter.split_documents(data)
 ```
 It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install chromadb`
+We also need to pull embedding model: `ollama pull nomic-embed-text`
 ```python
 from langchain.embeddings import OllamaEmbeddings
 from langchain.vectorstores import Chroma
@@ -68,7 +68,8 @@ The next thing is to send the question and the relevant parts of the docs to the
 ```python
 from langchain.chains import RetrievalQA
 qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
-qachain.invoke({"query": question})
+res = qachain.invoke({"query": question})
+print(res['result'])
 ```
 The answer received from this chain was:

--- a/docs/windows.md
+++ b/docs/windows.md
@@ -19,10 +19,12 @@ Logs will often be helpful in diagnosing the problem (see
 ## System Requirements
-* Windows 10 or newer, Home or Pro
+* Windows 10 22H2 or newer, Home or Pro
 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card
 * AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card
+Ollama uses unicode characters for progress indication, which may render as unknown squares in some older terminal fonts in Windows 10. If you see this, try changing your terminal font settings.
 ## API Access
 Here's a quick example showing API access from `powershell`
@@ -39,8 +41,8 @@ server.
 Ollama on Windows stores files in a few different locations.  You can view them in
 the explorer window by hitting `<cmd>+R` and type in:
 - `explorer %LOCALAPPDATA%\Ollama` contains logs, and downloaded updates
-    - *app.log* contains logs from the GUI application
+    - *app.log* contains most resent logs from the GUI application
-    - *server.log* contains the server logs
+    - *server.log* contains the most recent server logs
    - *upgrade.log* contains log output for upgrades
 - `explorer %LOCALAPPDATA%\Programs\Ollama` contains the binaries (The installer adds this to your user PATH)
 - `explorer %HOMEPATH%\.ollama` contains models and configuration

--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -3,210 +3,282 @@ package envconfig
 import (
 	"fmt"
 	"log/slog"
+	"math"
+	"net"
+	"net/url"
 	"os"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
+	"time"
 )
-var (
+// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
-	// Set via OLLAMA_ORIGINS in the environment
+// Default is scheme "http" and host "127.0.0.1:11434"
-	AllowOrigins []string
+func Host() *url.URL {
-	// Set via OLLAMA_DEBUG in the environment
+	defaultPort := "11434"
-	Debug bool
-	// Experimental flash attention
-	FlashAttention bool
-	// Set via OLLAMA_KEEP_ALIVE in the environment
-	KeepAlive string
-	// Set via OLLAMA_LLM_LIBRARY in the environment
-	LLMLibrary string
-	// Set via OLLAMA_MAX_LOADED_MODELS in the environment
-	MaxRunners int
-	// Set via OLLAMA_MAX_QUEUE in the environment
-	MaxQueuedRequests int
-	// Set via OLLAMA_MAX_VRAM in the environment
-	MaxVRAM uint64
-	// Set via OLLAMA_NOHISTORY in the environment
-	NoHistory bool
-	// Set via OLLAMA_NOPRUNE in the environment
-	NoPrune bool
-	// Set via OLLAMA_NUM_PARALLEL in the environment
-	NumParallel int
-	// Set via OLLAMA_RUNNERS_DIR in the environment
-	RunnersDir string
-	// Set via OLLAMA_TMPDIR in the environment
-	TmpDir string
-)
-type EnvVar struct {
+	s := strings.TrimSpace(Var("OLLAMA_HOST"))
-	Name        string
+	scheme, hostport, ok := strings.Cut(s, "://")
-	Value       any
+	switch {
-	Description string
+	case !ok:
-}
+		scheme, hostport = "http", s
+	case scheme == "http":
+		defaultPort = "80"
+	case scheme == "https":
+		defaultPort = "443"
+	}
-func AsMap() map[string]EnvVar {
+	// trim trailing slashes
-	return map[string]EnvVar{
+	hostport = strings.TrimRight(hostport, "/")
-		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
-		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
+	host, port, err := net.SplitHostPort(hostport)
-		"OLLAMA_HOST":              {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
+	if err != nil {
-		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
+		host, port = "127.0.0.1", defaultPort
-		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
+		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
-		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
+			host = ip.String()
-		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
+		} else if hostport != "" {
-		"OLLAMA_MAX_VRAM":          {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
+			host = hostport
-		"OLLAMA_MODELS":            {"OLLAMA_MODELS", "", "The path to the models directory"},
+		}
-		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
-		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
-		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
-		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
-		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
 	}
-}
-func Values() map[string]string {
+	if n, err := strconv.ParseInt(port, 10, 32); err != nil || n > 65535 || n < 0 {
-	vals := make(map[string]string)
+		slog.Warn("invalid port, using default", "port", port, "default", defaultPort)
-	for k, v := range AsMap() {
+		return &url.URL{
-		vals[k] = fmt.Sprintf("%v", v.Value)
+			Scheme: scheme,
+			Host:   net.JoinHostPort(host, defaultPort),
+		}
 	}
-	return vals
-}
-var defaultAllowOrigins = []string{
+	return &url.URL{
-	"localhost",
+		Scheme: scheme,
-	"127.0.0.1",
+		Host:   net.JoinHostPort(host, port),
-	"0.0.0.0",
+	}
 }
-// Clean quotes and spaces from the value
+// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
-func clean(key string) string {
+func Origins() (origins []string) {
-	return strings.Trim(os.Getenv(key), "\"' ")
+	if s := Var("OLLAMA_ORIGINS"); s != "" {
-}
+		origins = strings.Split(s, ",")
+	}
+	for _, origin := range []string{"localhost", "127.0.0.1", "0.0.0.0"} {
+		origins = append(origins,
+			fmt.Sprintf("http://%s", origin),
+			fmt.Sprintf("https://%s", origin),
+			fmt.Sprintf("http://%s", net.JoinHostPort(origin, "*")),
+			fmt.Sprintf("https://%s", net.JoinHostPort(origin, "*")),
+		)
+	}
-func init() {
+	origins = append(origins,
-	// default values
+		"app://*",
-	NumParallel = 1
+		"file://*",
-	MaxRunners = 1
+		"tauri://*",
-	MaxQueuedRequests = 512
+	)
-	LoadConfig()
+	return origins
 }
-func LoadConfig() {
+// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
-	if debug := clean("OLLAMA_DEBUG"); debug != "" {
+// Default is $HOME/.ollama/models
-		d, err := strconv.ParseBool(debug)
+func Models() string {
-		if err == nil {
+	if s := Var("OLLAMA_MODELS"); s != "" {
-			Debug = d
+		return s
-		} else {
-			Debug = true
-		}
 	}
-	if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
+	home, err := os.UserHomeDir()
-		d, err := strconv.ParseBool(fa)
+	if err != nil {
-		if err == nil {
+		panic(err)
-			FlashAttention = d
-		}
 	}
-	RunnersDir = clean("OLLAMA_RUNNERS_DIR")
+	return filepath.Join(home, ".ollama", "models")
-	if runtime.GOOS == "windows" && RunnersDir == "" {
+}
-		// On Windows we do not carry the payloads inside the main executable
-		appExe, err := os.Executable()
-		if err != nil {
-			slog.Error("failed to lookup executable path", "error", err)
-		}
-		cwd, err := os.Getwd()
+// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
-		if err != nil {
+// Negative values are treated as infinite. Zero is treated as no keep alive.
-			slog.Error("failed to lookup working directory", "error", err)
+// Default is 5 minutes.
+func KeepAlive() (keepAlive time.Duration) {
+	keepAlive = 5 * time.Minute
+	if s := Var("OLLAMA_KEEP_ALIVE"); s != "" {
+		if d, err := time.ParseDuration(s); err == nil {
+			keepAlive = d
+		} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
+			keepAlive = time.Duration(n) * time.Second
 		}
+	}
-		var paths []string
+	if keepAlive < 0 {
-		for _, root := range []string{filepath.Dir(appExe), cwd} {
+		return time.Duration(math.MaxInt64)
-			paths = append(paths,
+	}
-				filepath.Join(root),
-				filepath.Join(root, "windows-"+runtime.GOARCH),
+	return keepAlive
-				filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
+}
-			)
-		}
-		// Try a few variations to improve developer experience when building from source in the local tree
+func Bool(k string) func() bool {
-		for _, p := range paths {
+	return func() bool {
-			candidate := filepath.Join(p, "ollama_runners")
+		if s := Var(k); s != "" {
-			_, err := os.Stat(candidate)
+			b, err := strconv.ParseBool(s)
-			if err == nil {
+			if err != nil {
-				RunnersDir = candidate
+				return true
-				break
 			}
+			return b
 		}
-		if RunnersDir == "" {
-			slog.Error("unable to locate llm runner directory.  Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
+		return false
-		}
 	}
+}
-	TmpDir = clean("OLLAMA_TMPDIR")
+var (
+	// Debug enabled additional debug information.
+	Debug = Bool("OLLAMA_DEBUG")
+	// FlashAttention enables the experimental flash attention feature.
+	FlashAttention = Bool("OLLAMA_FLASH_ATTENTION")
+	// NoHistory disables readline history.
+	NoHistory = Bool("OLLAMA_NOHISTORY")
+	// NoPrune disables pruning of model blobs on startup.
+	NoPrune = Bool("OLLAMA_NOPRUNE")
+	// SchedSpread allows scheduling models across all GPUs.
+	SchedSpread = Bool("OLLAMA_SCHED_SPREAD")
+	// IntelGPU enables experimental Intel GPU detection.
+	IntelGPU = Bool("OLLAMA_INTEL_GPU")
+)
-	userLimit := clean("OLLAMA_MAX_VRAM")
+func String(s string) func() string {
-	if userLimit != "" {
+	return func() string {
-		avail, err := strconv.ParseUint(userLimit, 10, 64)
+		return Var(s)
-		if err != nil {
-			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
-		} else {
-			MaxVRAM = avail
-		}
 	}
+}
+var (
+	LLMLibrary = String("OLLAMA_LLM_LIBRARY")
+	TmpDir     = String("OLLAMA_TMPDIR")
-	LLMLibrary = clean("OLLAMA_LLM_LIBRARY")
+	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
+	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
+	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
+	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
+	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
+)
-	if onp := clean("OLLAMA_NUM_PARALLEL"); onp != "" {
+func RunnersDir() (p string) {
-		val, err := strconv.Atoi(onp)
+	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
-		if err != nil || val <= 0 {
+		return p
-			slog.Error("invalid setting must be greater than zero", "OLLAMA_NUM_PARALLEL", onp, "error", err)
-		} else {
-			NumParallel = val
-		}
 	}
-	if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
+	if runtime.GOOS != "windows" {
-		NoHistory = true
+		return
 	}
-	if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
+	defer func() {
-		NoPrune = true
+		if p == "" {
+			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama_runners'")
+		}
+	}()
+	// On Windows we do not carry the payloads inside the main executable
+	exe, err := os.Executable()
+	if err != nil {
+		return
 	}
-	if origins := clean("OLLAMA_ORIGINS"); origins != "" {
+	cwd, err := os.Getwd()
-		AllowOrigins = strings.Split(origins, ",")
+	if err != nil {
+		return
 	}
-	for _, allowOrigin := range defaultAllowOrigins {
-		AllowOrigins = append(AllowOrigins,
+	var paths []string
-			fmt.Sprintf("http://%s", allowOrigin),
+	for _, root := range []string{filepath.Dir(exe), cwd} {
-			fmt.Sprintf("https://%s", allowOrigin),
+		paths = append(paths,
-			fmt.Sprintf("http://%s:*", allowOrigin),
+			root,
-			fmt.Sprintf("https://%s:*", allowOrigin),
+			filepath.Join(root, "windows-"+runtime.GOARCH),
+			filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
 		)
 	}
-	maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
+	// Try a few variations to improve developer experience when building from source in the local tree
-	if maxRunners != "" {
+	for _, path := range paths {
-		m, err := strconv.Atoi(maxRunners)
+		candidate := filepath.Join(path, "ollama_runners")
-		if err != nil {
+		if _, err := os.Stat(candidate); err == nil {
-			slog.Error("invalid setting", "OLLAMA_MAX_LOADED_MODELS", maxRunners, "error", err)
+			p = candidate
-		} else {
+			break
-			MaxRunners = m
 		}
 	}
-	if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
+	return p
-		p, err := strconv.Atoi(onp)
+}
-		if err != nil || p <= 0 {
-			slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
+func Uint(key string, defaultValue uint) func() uint {
-		} else {
+	return func() uint {
-			MaxQueuedRequests = p
+		if s := Var(key); s != "" {
+			if n, err := strconv.ParseUint(s, 10, 64); err != nil {
+				slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
+			} else {
+				return uint(n)
+			}
 		}
+		return defaultValue
+	}
+}
+var (
+	// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
+	NumParallel = Uint("OLLAMA_NUM_PARALLEL", 0)
+	// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
+	MaxRunners = Uint("OLLAMA_MAX_LOADED_MODELS", 0)
+	// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
+	MaxQueue = Uint("OLLAMA_MAX_QUEUE", 512)
+	// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
+	MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
+)
+type EnvVar struct {
+	Name        string
+	Value       any
+	Description string
+}
+func AsMap() map[string]EnvVar {
+	ret := map[string]EnvVar{
+		"OLLAMA_DEBUG":             {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
+		"OLLAMA_FLASH_ATTENTION":   {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
+		"OLLAMA_HOST":              {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
+		"OLLAMA_KEEP_ALIVE":        {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
+		"OLLAMA_LLM_LIBRARY":       {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
+		"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
+		"OLLAMA_MAX_QUEUE":         {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
+		"OLLAMA_MODELS":            {"OLLAMA_MODELS", Models(), "The path to the models directory"},
+		"OLLAMA_NOHISTORY":         {"OLLAMA_NOHISTORY", NoHistory(), "Do not preserve readline history"},
+		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
+		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
+		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
+		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
+		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
+		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
+	}
+	if runtime.GOOS != "darwin" {
+		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
+		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible"}
+		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible"}
+		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible"}
+		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
+		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 	}
+	return ret
+}
+func Values() map[string]string {
+	vals := make(map[string]string)
+	for k, v := range AsMap() {
+		vals[k] = fmt.Sprintf("%v", v.Value)
+	}
+	return vals
+}
-	KeepAlive = clean("OLLAMA_KEEP_ALIVE")
+// Var returns an environment variable stripped of leading and trailing quotes or spaces
+func Var(key string) string {
+	return strings.Trim(strings.TrimSpace(os.Getenv(key)), "\"'")
 }
--- a/envconfig/config_test.go
+++ b/envconfig/config_test.go
 package envconfig
 import (
+	"math"
 	"testing"
+	"time"
-	"github.com/stretchr/testify/require"
+	"github.com/google/go-cmp/cmp"
 )
-func TestConfig(t *testing.T) {
+func TestHost(t *testing.T) {
-	Debug = false // Reset whatever was loaded in init()
+	cases := map[string]struct {
-	t.Setenv("OLLAMA_DEBUG", "")
+		value  string
-	LoadConfig()
+		expect string
-	require.False(t, Debug)
+	}{
-	t.Setenv("OLLAMA_DEBUG", "false")
+		"empty":               {"", "127.0.0.1:11434"},
-	LoadConfig()
+		"only address":        {"1.2.3.4", "1.2.3.4:11434"},
-	require.False(t, Debug)
+		"only port":           {":1234", ":1234"},
-	t.Setenv("OLLAMA_DEBUG", "1")
+		"address and port":    {"1.2.3.4:1234", "1.2.3.4:1234"},
-	LoadConfig()
+		"hostname":            {"example.com", "example.com:11434"},
-	require.True(t, Debug)
+		"hostname and port":   {"example.com:1234", "example.com:1234"},
-	t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
+		"zero port":           {":0", ":0"},
-	LoadConfig()
+		"too large port":      {":66000", ":11434"},
-	require.True(t, FlashAttention)
+		"too small port":      {":-1", ":11434"},
+		"ipv6 localhost":      {"[::1]", "[::1]:11434"},
+		"ipv6 world open":     {"[::]", "[::]:11434"},
+		"ipv6 no brackets":    {"::1", "[::1]:11434"},
+		"ipv6 + port":         {"[::1]:1337", "[::1]:1337"},
+		"extra space":         {" 1.2.3.4 ", "1.2.3.4:11434"},
+		"extra quotes":        {"\"1.2.3.4\"", "1.2.3.4:11434"},
+		"extra space+quotes":  {" \" 1.2.3.4 \" ", "1.2.3.4:11434"},
+		"extra single quotes": {"'1.2.3.4'", "1.2.3.4:11434"},
+		"http":                {"http://1.2.3.4", "1.2.3.4:80"},
+		"http port":           {"http://1.2.3.4:4321", "1.2.3.4:4321"},
+		"https":               {"https://1.2.3.4", "1.2.3.4:443"},
+		"https port":          {"https://1.2.3.4:4321", "1.2.3.4:4321"},
+	}
+	for name, tt := range cases {
+		t.Run(name, func(t *testing.T) {
+			t.Setenv("OLLAMA_HOST", tt.value)
+			if host := Host(); host.Host != tt.expect {
+				t.Errorf("%s: expected %s, got %s", name, tt.expect, host.Host)
+			}
+		})
+	}
+}
+func TestOrigins(t *testing.T) {
+	cases := []struct {
+		value  string
+		expect []string
+	}{
+		{"", []string{
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://10.0.0.1", []string{
+			"http://10.0.0.1",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://172.16.0.1,https://192.168.0.1", []string{
+			"http://172.16.0.1",
+			"https://192.168.0.1",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+		{"http://totally.safe,http://definitely.legit", []string{
+			"http://totally.safe",
+			"http://definitely.legit",
+			"http://localhost",
+			"https://localhost",
+			"http://localhost:*",
+			"https://localhost:*",
+			"http://127.0.0.1",
+			"https://127.0.0.1",
+			"http://127.0.0.1:*",
+			"https://127.0.0.1:*",
+			"http://0.0.0.0",
+			"https://0.0.0.0",
+			"http://0.0.0.0:*",
+			"https://0.0.0.0:*",
+			"app://*",
+			"file://*",
+			"tauri://*",
+		}},
+	}
+	for _, tt := range cases {
+		t.Run(tt.value, func(t *testing.T) {
+			t.Setenv("OLLAMA_ORIGINS", tt.value)
+			if diff := cmp.Diff(Origins(), tt.expect); diff != "" {
+				t.Errorf("%s: mismatch (-want +got):\n%s", tt.value, diff)
+			}
+		})
+	}
+}
+func TestBool(t *testing.T) {
+	cases := map[string]bool{
+		"":      false,
+		"true":  true,
+		"false": false,
+		"1":     true,
+		"0":     false,
+		// invalid values
+		"random":    true,
+		"something": true,
+	}
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_BOOL", k)
+			if b := Bool("OLLAMA_BOOL")(); b != v {
+				t.Errorf("%s: expected %t, got %t", k, v, b)
+			}
+		})
+	}
+}
+func TestUint(t *testing.T) {
+	cases := map[string]uint{
+		"0":    0,
+		"1":    1,
+		"1337": 1337,
+		// default values
+		"":       11434,
+		"-1":     11434,
+		"0o10":   11434,
+		"0x10":   11434,
+		"string": 11434,
+	}
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_UINT", k)
+			if i := Uint("OLLAMA_UINT", 11434)(); i != v {
+				t.Errorf("%s: expected %d, got %d", k, v, i)
+			}
+		})
+	}
+}
+func TestKeepAlive(t *testing.T) {
+	cases := map[string]time.Duration{
+		"":       5 * time.Minute,
+		"1s":     time.Second,
+		"1m":     time.Minute,
+		"1h":     time.Hour,
+		"5m0s":   5 * time.Minute,
+		"1h2m3s": 1*time.Hour + 2*time.Minute + 3*time.Second,
+		"0":      time.Duration(0),
+		"60":     60 * time.Second,
+		"120":    2 * time.Minute,
+		"3600":   time.Hour,
+		"-0":     time.Duration(0),
+		"-1":     time.Duration(math.MaxInt64),
+		"-1m":    time.Duration(math.MaxInt64),
+		// invalid values
+		" ":   5 * time.Minute,
+		"???": 5 * time.Minute,
+		"1d":  5 * time.Minute,
+		"1y":  5 * time.Minute,
+		"1w":  5 * time.Minute,
+	}
+	for tt, expect := range cases {
+		t.Run(tt, func(t *testing.T) {
+			t.Setenv("OLLAMA_KEEP_ALIVE", tt)
+			if actual := KeepAlive(); actual != expect {
+				t.Errorf("%s: expected %s, got %s", tt, expect, actual)
+			}
+		})
+	}
+}
+func TestVar(t *testing.T) {
+	cases := map[string]string{
+		"value":       "value",
+		" value ":     "value",
+		" 'value' ":   "value",
+		` "value" `:   "value",
+		" ' value ' ": " value ",
+		` " value " `: " value ",
+	}
+	for k, v := range cases {
+		t.Run(k, func(t *testing.T) {
+			t.Setenv("OLLAMA_VAR", k)
+			if s := Var("OLLAMA_VAR"); s != v {
+				t.Errorf("%s: expected %q, got %q", k, v, s)
+			}
+		})
+	}
 }
--- a/examples/go-chat/main.go
+++ b/examples/go-chat/main.go
@@ -35,7 +35,7 @@ func main() {
 	ctx := context.Background()
 	req := &api.ChatRequest{
-		Model:    "llama3",
+		Model:    "llama3.1",
 		Messages: messages,
 	}

--- a/examples/go-generate-streaming/main.go
+++ b/examples/go-generate-streaming/main.go
@@ -16,7 +16,7 @@ func main() {
 	// By default, GenerateRequest is streaming.
 	req := &api.GenerateRequest{
-		Model:  "gemma",
+		Model:  "gemma2",
 		Prompt: "how many planets are there?",
 	}

--- a/examples/go-generate/main.go
+++ b/examples/go-generate/main.go
@@ -15,7 +15,7 @@ func main() {
 	}
 	req := &api.GenerateRequest{
-		Model:  "gemma",
+		Model:  "gemma2",
 		Prompt: "how many planets are there?",
 		// set streaming to false

--- a/examples/langchain-python-rag-document/README.md
+++ b/examples/langchain-python-rag-document/README.md
@@ -4,6 +4,14 @@ This example provides an interface for asking questions to a PDF document.
 ## Setup
+1. Ensure you have the `llama3.1` model installed:
+```
+ollama pull llama3.1
+```
+2. Install the Python Requirements.
 ```
 pip install -r requirements.txt
 ```

--- a/examples/langchain-python-rag-document/main.py
+++ b/examples/langchain-python-rag-document/main.py
@@ -51,7 +51,7 @@ while True:
        template=template,
    )
-    llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    llm = Ollama(model="llama3.1", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=vectorstore.as_retriever(),

--- a/examples/langchain-python-rag-privategpt/ingest.py
+++ b/examples/langchain-python-rag-privategpt/ingest.py
@@ -77,13 +77,21 @@ LOADER_MAPPING = {
 def load_single_document(file_path: str) -> List[Document]:
-    ext = "." + file_path.rsplit(".", 1)[-1]
+    if os.path.getsize(file_path) != 0:
-    if ext in LOADER_MAPPING:
+        filename, ext = os.path.splitext(file_path)
-        loader_class, loader_args = LOADER_MAPPING[ext]
+        if ext in LOADER_MAPPING:
-        loader = loader_class(file_path, **loader_args)
+            loader_class, loader_args = LOADER_MAPPING[ext]
-        return loader.load()
+            try:
+                loader = loader_class(file_path, **loader_args)
+                if loader:
+                    return loader.load()
+            except:
+                print(f"Corrupted file {file_path}. Ignoring it.")
+        else:
+            print(f"Unsupported file {file_path}. Ignoring it.")
+    else:
+        print(f"Empty file {file_path}. Ignoring it.")
-    raise ValueError(f"Unsupported file extension '{ext}'")
 def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]:
    """
@@ -100,7 +108,8 @@ def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Docum
        results = []
        with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar:
            for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)):
-                results.extend(docs)
+                if docs:
+                    results.extend(docs)
                pbar.update()
    return results

--- a/examples/langchain-python-rag-privategpt/requirements.txt
+++ b/examples/langchain-python-rag-privategpt/requirements.txt
@@ -11,4 +11,5 @@ tabulate==0.9.0
 pandoc==2.3
 pypandoc==1.11
 tqdm==4.66.1
 sentence_transformers==2.2.2
\ No newline at end of file
+numpy>=1.22.2 # not directly required, pinned by Snyk to avoid a vulnerability
\ No newline at end of file
--- a/examples/langchain-python-rag-websummary/README.md
+++ b/examples/langchain-python-rag-websummary/README.md
@@ -4,10 +4,10 @@ This example summarizes the website, [https://ollama.com/blog/run-llama2-uncenso
 ## Running the Example
-1. Ensure you have the `llama2` model installed:
+1. Ensure you have the `llama3.1` model installed:
   ```bash
-   ollama pull llama2
+   ollama pull llama3.1
   ```
 2. Install the Python Requirements.

--- a/examples/langchain-python-rag-websummary/main.py
+++ b/examples/langchain-python-rag-websummary/main.py
@@ -5,8 +5,8 @@ from langchain.chains.summarize import load_summarize_chain
 loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
 docs = loader.load()
-llm = Ollama(model="llama3")
+llm = Ollama(model="llama3.1")
 chain = load_summarize_chain(llm, chain_type="stuff")
-result = chain.invoke(docs) 
+result = chain.invoke(docs)
 print(result)
--- a/examples/langchain-python-simple/README.md
+++ b/examples/langchain-python-simple/README.md
@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
 ## Running the Example
-1. Ensure you have the `llama3` model installed:
+1. Ensure you have the `llama3.1` model installed:
   ```bash
-   ollama pull llama3
+   ollama pull llama3.1
   ```
 2. Install the Python Requirements.

--- a/examples/langchain-python-simple/main.py
+++ b/examples/langchain-python-simple/main.py
 from langchain.llms import Ollama
 input = input("What is your question?")
-llm = Ollama(model="llama3")
+llm = Ollama(model="llama3.1")
 res = llm.predict(input)
 print (res)
--- a/examples/modelfile-mario/Modelfile
+++ b/examples/modelfile-mario/Modelfile
-FROM llama3
+FROM llama3.1
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from super mario bros, acting as an assistant.

--- a/examples/modelfile-mario/readme.md
+++ b/examples/modelfile-mario/readme.md
@@ -2,12 +2,12 @@
 # Example character: Mario
-This example shows how to create a basic character using Llama3 as the base model.
+This example shows how to create a basic character using Llama3.1 as the base model.
 To run this example:
 1. Download the Modelfile
-2. `ollama pull llama3` to get the base model used in the model file.
+2. `ollama pull llama3.1` to get the base model used in the model file.
 3. `ollama create NAME -f ./Modelfile`
 4. `ollama run NAME`
@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
 What the model file looks like:
 ```
-FROM llama3
+FROM llama3.1
 PARAMETER temperature 1
 SYSTEM """
 You are Mario from Super Mario Bros, acting as an assistant.

--- a/examples/python-dockerit/dockerit.py
+++ b/examples/python-dockerit/dockerit.py
@@ -4,7 +4,7 @@ imageName = input("Enter the name of the image: ")
 client = docker.from_env()
 s = requests.Session()
 output=""
-with s.post('http://localhost:11434/api/generate', json={'model': 'dockerit', 'prompt': inputDescription}, stream=True) as r:
+with s.post('http://localhost:11434/api/generate', json={'model': 'mattw/dockerit', 'prompt': inputDescription}, stream=True) as r:
  for line in r.iter_lines():
    if line:
      j = json.loads(line)

--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
@@ -2,7 +2,7 @@ import requests
 import json
 import random
-model = "llama3"
+model = "llama3.1"
 template = {
  "firstName": "",
  "lastName": "",