Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
8570c1c0
Commit
8570c1c0
authored
Jul 03, 2024
by
Michael Yang
Browse files
keepalive
parent
55cd3ddc
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
55 additions
and
47 deletions
+55
-47
envconfig/config.go
envconfig/config.go
+21
-30
envconfig/config_test.go
envconfig/config_test.go
+33
-16
server/sched.go
server/sched.go
+1
-1
No files found.
envconfig/config.go
View file @
8570c1c0
...
@@ -99,6 +99,26 @@ func Models() string {
...
@@ -99,6 +99,26 @@ func Models() string {
return
filepath
.
Join
(
home
,
".ollama"
,
"models"
)
return
filepath
.
Join
(
home
,
".ollama"
,
"models"
)
}
}
// KeepAlive returns the duration that models stay loaded in memory. KeepAlive can be configured via the OLLAMA_KEEP_ALIVE environment variable.
// Negative values are treated as infinite. Zero is treated as no keep alive.
// Default is 5 minutes.
func
KeepAlive
()
(
keepAlive
time
.
Duration
)
{
keepAlive
=
5
*
time
.
Minute
if
s
:=
os
.
Getenv
(
"OLLAMA_KEEP_ALIVE"
);
s
!=
""
{
if
d
,
err
:=
time
.
ParseDuration
(
s
);
err
==
nil
{
keepAlive
=
d
}
else
if
n
,
err
:=
strconv
.
ParseInt
(
s
,
10
,
64
);
err
==
nil
{
keepAlive
=
time
.
Duration
(
n
)
*
time
.
Second
}
}
if
keepAlive
<
0
{
return
time
.
Duration
(
math
.
MaxInt64
)
}
return
keepAlive
}
func
Bool
(
k
string
)
func
()
bool
{
func
Bool
(
k
string
)
func
()
bool
{
return
func
()
bool
{
return
func
()
bool
{
if
s
:=
getenv
(
k
);
s
!=
""
{
if
s
:=
getenv
(
k
);
s
!=
""
{
...
@@ -130,8 +150,6 @@ var (
...
@@ -130,8 +150,6 @@ var (
)
)
var
(
var
(
// Set via OLLAMA_KEEP_ALIVE in the environment
KeepAlive
time
.
Duration
// Set via OLLAMA_LLM_LIBRARY in the environment
// Set via OLLAMA_LLM_LIBRARY in the environment
LLMLibrary
string
LLMLibrary
string
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
...
@@ -168,7 +186,7 @@ func AsMap() map[string]EnvVar {
...
@@ -168,7 +186,7 @@ func AsMap() map[string]EnvVar {
"OLLAMA_DEBUG"
:
{
"OLLAMA_DEBUG"
,
Debug
(),
"Show additional debug information (e.g. OLLAMA_DEBUG=1)"
},
"OLLAMA_DEBUG"
:
{
"OLLAMA_DEBUG"
,
Debug
(),
"Show additional debug information (e.g. OLLAMA_DEBUG=1)"
},
"OLLAMA_FLASH_ATTENTION"
:
{
"OLLAMA_FLASH_ATTENTION"
,
FlashAttention
(),
"Enabled flash attention"
},
"OLLAMA_FLASH_ATTENTION"
:
{
"OLLAMA_FLASH_ATTENTION"
,
FlashAttention
(),
"Enabled flash attention"
},
"OLLAMA_HOST"
:
{
"OLLAMA_HOST"
,
Host
(),
"IP Address for the ollama server (default 127.0.0.1:11434)"
},
"OLLAMA_HOST"
:
{
"OLLAMA_HOST"
,
Host
(),
"IP Address for the ollama server (default 127.0.0.1:11434)"
},
"OLLAMA_KEEP_ALIVE"
:
{
"OLLAMA_KEEP_ALIVE"
,
KeepAlive
,
"The duration that models stay loaded in memory (default
\"
5m
\"
)"
},
"OLLAMA_KEEP_ALIVE"
:
{
"OLLAMA_KEEP_ALIVE"
,
KeepAlive
()
,
"The duration that models stay loaded in memory (default
\"
5m
\"
)"
},
"OLLAMA_LLM_LIBRARY"
:
{
"OLLAMA_LLM_LIBRARY"
,
LLMLibrary
,
"Set LLM library to bypass autodetection"
},
"OLLAMA_LLM_LIBRARY"
:
{
"OLLAMA_LLM_LIBRARY"
,
LLMLibrary
,
"Set LLM library to bypass autodetection"
},
"OLLAMA_MAX_LOADED_MODELS"
:
{
"OLLAMA_MAX_LOADED_MODELS"
,
MaxRunners
,
"Maximum number of loaded models per GPU"
},
"OLLAMA_MAX_LOADED_MODELS"
:
{
"OLLAMA_MAX_LOADED_MODELS"
,
MaxRunners
,
"Maximum number of loaded models per GPU"
},
"OLLAMA_MAX_QUEUE"
:
{
"OLLAMA_MAX_QUEUE"
,
MaxQueuedRequests
,
"Maximum number of queued requests"
},
"OLLAMA_MAX_QUEUE"
:
{
"OLLAMA_MAX_QUEUE"
,
MaxQueuedRequests
,
"Maximum number of queued requests"
},
...
@@ -210,7 +228,6 @@ func init() {
...
@@ -210,7 +228,6 @@ func init() {
NumParallel
=
0
// Autoselect
NumParallel
=
0
// Autoselect
MaxRunners
=
0
// Autoselect
MaxRunners
=
0
// Autoselect
MaxQueuedRequests
=
512
MaxQueuedRequests
=
512
KeepAlive
=
5
*
time
.
Minute
LoadConfig
()
LoadConfig
()
}
}
...
@@ -284,35 +301,9 @@ func LoadConfig() {
...
@@ -284,35 +301,9 @@ func LoadConfig() {
}
}
}
}
ka
:=
getenv
(
"OLLAMA_KEEP_ALIVE"
)
if
ka
!=
""
{
loadKeepAlive
(
ka
)
}
CudaVisibleDevices
=
getenv
(
"CUDA_VISIBLE_DEVICES"
)
CudaVisibleDevices
=
getenv
(
"CUDA_VISIBLE_DEVICES"
)
HipVisibleDevices
=
getenv
(
"HIP_VISIBLE_DEVICES"
)
HipVisibleDevices
=
getenv
(
"HIP_VISIBLE_DEVICES"
)
RocrVisibleDevices
=
getenv
(
"ROCR_VISIBLE_DEVICES"
)
RocrVisibleDevices
=
getenv
(
"ROCR_VISIBLE_DEVICES"
)
GpuDeviceOrdinal
=
getenv
(
"GPU_DEVICE_ORDINAL"
)
GpuDeviceOrdinal
=
getenv
(
"GPU_DEVICE_ORDINAL"
)
HsaOverrideGfxVersion
=
getenv
(
"HSA_OVERRIDE_GFX_VERSION"
)
HsaOverrideGfxVersion
=
getenv
(
"HSA_OVERRIDE_GFX_VERSION"
)
}
}
func
loadKeepAlive
(
ka
string
)
{
v
,
err
:=
strconv
.
Atoi
(
ka
)
if
err
!=
nil
{
d
,
err
:=
time
.
ParseDuration
(
ka
)
if
err
==
nil
{
if
d
<
0
{
KeepAlive
=
time
.
Duration
(
math
.
MaxInt64
)
}
else
{
KeepAlive
=
d
}
}
}
else
{
d
:=
time
.
Duration
(
v
)
*
time
.
Second
if
d
<
0
{
KeepAlive
=
time
.
Duration
(
math
.
MaxInt64
)
}
else
{
KeepAlive
=
d
}
}
}
envconfig/config_test.go
View file @
8570c1c0
...
@@ -21,22 +21,6 @@ func TestSmoke(t *testing.T) {
...
@@ -21,22 +21,6 @@ func TestSmoke(t *testing.T) {
t
.
Setenv
(
"OLLAMA_FLASH_ATTENTION"
,
"1"
)
t
.
Setenv
(
"OLLAMA_FLASH_ATTENTION"
,
"1"
)
require
.
True
(
t
,
FlashAttention
())
require
.
True
(
t
,
FlashAttention
())
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
""
)
LoadConfig
()
require
.
Equal
(
t
,
5
*
time
.
Minute
,
KeepAlive
)
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
"3"
)
LoadConfig
()
require
.
Equal
(
t
,
3
*
time
.
Second
,
KeepAlive
)
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
"1h"
)
LoadConfig
()
require
.
Equal
(
t
,
1
*
time
.
Hour
,
KeepAlive
)
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
"-1s"
)
LoadConfig
()
require
.
Equal
(
t
,
time
.
Duration
(
math
.
MaxInt64
),
KeepAlive
)
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
"-1"
)
LoadConfig
()
require
.
Equal
(
t
,
time
.
Duration
(
math
.
MaxInt64
),
KeepAlive
)
}
}
func
TestHost
(
t
*
testing
.
T
)
{
func
TestHost
(
t
*
testing
.
T
)
{
...
@@ -186,3 +170,36 @@ func TestBool(t *testing.T) {
...
@@ -186,3 +170,36 @@ func TestBool(t *testing.T) {
})
})
}
}
}
}
func
TestKeepAlive
(
t
*
testing
.
T
)
{
cases
:=
map
[
string
]
time
.
Duration
{
""
:
5
*
time
.
Minute
,
"1s"
:
time
.
Second
,
"1m"
:
time
.
Minute
,
"1h"
:
time
.
Hour
,
"5m0s"
:
5
*
time
.
Minute
,
"1h2m3s"
:
1
*
time
.
Hour
+
2
*
time
.
Minute
+
3
*
time
.
Second
,
"0"
:
time
.
Duration
(
0
),
"60"
:
60
*
time
.
Second
,
"120"
:
2
*
time
.
Minute
,
"3600"
:
time
.
Hour
,
"-0"
:
time
.
Duration
(
0
),
"-1"
:
time
.
Duration
(
math
.
MaxInt64
),
"-1m"
:
time
.
Duration
(
math
.
MaxInt64
),
// invalid values
" "
:
5
*
time
.
Minute
,
"???"
:
5
*
time
.
Minute
,
"1d"
:
5
*
time
.
Minute
,
"1y"
:
5
*
time
.
Minute
,
"1w"
:
5
*
time
.
Minute
,
}
for
tt
,
expect
:=
range
cases
{
t
.
Run
(
tt
,
func
(
t
*
testing
.
T
)
{
t
.
Setenv
(
"OLLAMA_KEEP_ALIVE"
,
tt
)
if
actual
:=
KeepAlive
();
actual
!=
expect
{
t
.
Errorf
(
"%s: expected %s, got %s"
,
tt
,
expect
,
actual
)
}
})
}
}
server/sched.go
View file @
8570c1c0
...
@@ -401,7 +401,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList,
...
@@ -401,7 +401,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList,
if
numParallel
<
1
{
if
numParallel
<
1
{
numParallel
=
1
numParallel
=
1
}
}
sessionDuration
:=
envconfig
.
KeepAlive
sessionDuration
:=
envconfig
.
KeepAlive
()
if
req
.
sessionDuration
!=
nil
{
if
req
.
sessionDuration
!=
nil
{
sessionDuration
=
req
.
sessionDuration
.
Duration
sessionDuration
=
req
.
sessionDuration
.
Duration
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment