Unverified Commit 6ec71d8f authored by Devon Rifkin's avatar Devon Rifkin Committed by GitHub
Browse files

Merge pull request #10452 from ollama/drifkin/4096-context-length

config: update default context length to 4096
parents a25f3f82 44b466ee
...@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md). ...@@ -20,7 +20,7 @@ Please refer to the [GPU docs](./gpu.md).
## How can I specify the context window size? ## How can I specify the context window size?
By default, Ollama uses a context window size of 2048 tokens. By default, Ollama uses a context window size of 4096 tokens.
This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use: This can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context window to 8K, use:
......
...@@ -169,7 +169,7 @@ var ( ...@@ -169,7 +169,7 @@ var (
// Enable the new Ollama engine // Enable the new Ollama engine
NewEngine = Bool("OLLAMA_NEW_ENGINE") NewEngine = Bool("OLLAMA_NEW_ENGINE")
// ContextLength sets the default context length // ContextLength sets the default context length
ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 2048) ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
) )
func String(s string) func() string { func String(s string) func() string {
...@@ -255,7 +255,7 @@ func AsMap() map[string]EnvVar { ...@@ -255,7 +255,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"}, "OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowedOrigins(), "A comma separated list of allowed origins"},
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
"OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
"OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 2048)"}, "OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
"OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"}, "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
// Informational // Informational
......
...@@ -279,8 +279,8 @@ func TestVar(t *testing.T) { ...@@ -279,8 +279,8 @@ func TestVar(t *testing.T) {
func TestContextLength(t *testing.T) { func TestContextLength(t *testing.T) {
cases := map[string]uint{ cases := map[string]uint{
"": 2048, "": 4096,
"4096": 4096, "2048": 2048,
} }
for k, v := range cases { for k, v := range cases {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment