Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
85d9d73a
Commit
85d9d73a
authored
Jul 08, 2024
by
Michael Yang
Browse files
comments
parent
78140a71
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
90 additions
and
41 deletions
+90
-41
envconfig/config.go
envconfig/config.go
+27
-23
envconfig/config_test.go
envconfig/config_test.go
+61
-16
server/sched.go
server/sched.go
+2
-2
No files found.
envconfig/config.go
View file @
85d9d73a
package
envconfig
package
envconfig
import
(
import
(
"errors"
"fmt"
"fmt"
"log/slog"
"log/slog"
"math"
"math"
...
@@ -15,15 +14,12 @@ import (
...
@@ -15,15 +14,12 @@ import (
"time"
"time"
)
)
var
ErrInvalidHostPort
=
errors
.
New
(
"invalid port specified in OLLAMA_HOST"
)
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Host returns the scheme and host. Host can be configured via the OLLAMA_HOST environment variable.
// Default is scheme "http" and host "127.0.0.1:11434"
// Default is scheme "http" and host "127.0.0.1:11434"
func
Host
()
*
url
.
URL
{
func
Host
()
*
url
.
URL
{
defaultPort
:=
"11434"
defaultPort
:=
"11434"
s
:=
os
.
Getenv
(
"OLLAMA_HOST"
)
s
:=
strings
.
TrimSpace
(
Var
(
"OLLAMA_HOST"
))
s
=
strings
.
TrimSpace
(
strings
.
Trim
(
strings
.
TrimSpace
(
s
),
"
\"
'"
))
scheme
,
hostport
,
ok
:=
strings
.
Cut
(
s
,
"://"
)
scheme
,
hostport
,
ok
:=
strings
.
Cut
(
s
,
"://"
)
switch
{
switch
{
case
!
ok
:
case
!
ok
:
...
@@ -48,6 +44,7 @@ func Host() *url.URL {
...
@@ -48,6 +44,7 @@ func Host() *url.URL {
}
}
if
n
,
err
:=
strconv
.
ParseInt
(
port
,
10
,
32
);
err
!=
nil
||
n
>
65535
||
n
<
0
{
if
n
,
err
:=
strconv
.
ParseInt
(
port
,
10
,
32
);
err
!=
nil
||
n
>
65535
||
n
<
0
{
slog
.
Warn
(
"invalid port, using default"
,
"port"
,
port
,
"default"
,
defaultPort
)
return
&
url
.
URL
{
return
&
url
.
URL
{
Scheme
:
scheme
,
Scheme
:
scheme
,
Host
:
net
.
JoinHostPort
(
host
,
defaultPort
),
Host
:
net
.
JoinHostPort
(
host
,
defaultPort
),
...
@@ -62,7 +59,7 @@ func Host() *url.URL {
...
@@ -62,7 +59,7 @@ func Host() *url.URL {
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
// Origins returns a list of allowed origins. Origins can be configured via the OLLAMA_ORIGINS environment variable.
func
Origins
()
(
origins
[]
string
)
{
func
Origins
()
(
origins
[]
string
)
{
if
s
:=
getenv
(
"OLLAMA_ORIGINS"
);
s
!=
""
{
if
s
:=
Var
(
"OLLAMA_ORIGINS"
);
s
!=
""
{
origins
=
strings
.
Split
(
s
,
","
)
origins
=
strings
.
Split
(
s
,
","
)
}
}
...
@@ -87,7 +84,7 @@ func Origins() (origins []string) {
...
@@ -87,7 +84,7 @@ func Origins() (origins []string) {
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Models returns the path to the models directory. Models directory can be configured via the OLLAMA_MODELS environment variable.
// Default is $HOME/.ollama/models
// Default is $HOME/.ollama/models
func
Models
()
string
{
func
Models
()
string
{
if
s
,
ok
:=
os
.
LookupEnv
(
"OLLAMA_MODELS"
);
ok
{
if
s
:=
Var
(
"OLLAMA_MODELS"
);
s
!=
""
{
return
s
return
s
}
}
...
@@ -104,7 +101,7 @@ func Models() string {
...
@@ -104,7 +101,7 @@ func Models() string {
// Default is 5 minutes.
// Default is 5 minutes.
func
KeepAlive
()
(
keepAlive
time
.
Duration
)
{
func
KeepAlive
()
(
keepAlive
time
.
Duration
)
{
keepAlive
=
5
*
time
.
Minute
keepAlive
=
5
*
time
.
Minute
if
s
:=
os
.
Getenv
(
"OLLAMA_KEEP_ALIVE"
);
s
!=
""
{
if
s
:=
Var
(
"OLLAMA_KEEP_ALIVE"
);
s
!=
""
{
if
d
,
err
:=
time
.
ParseDuration
(
s
);
err
==
nil
{
if
d
,
err
:=
time
.
ParseDuration
(
s
);
err
==
nil
{
keepAlive
=
d
keepAlive
=
d
}
else
if
n
,
err
:=
strconv
.
ParseInt
(
s
,
10
,
64
);
err
==
nil
{
}
else
if
n
,
err
:=
strconv
.
ParseInt
(
s
,
10
,
64
);
err
==
nil
{
...
@@ -121,7 +118,7 @@ func KeepAlive() (keepAlive time.Duration) {
...
@@ -121,7 +118,7 @@ func KeepAlive() (keepAlive time.Duration) {
func
Bool
(
k
string
)
func
()
bool
{
func
Bool
(
k
string
)
func
()
bool
{
return
func
()
bool
{
return
func
()
bool
{
if
s
:=
getenv
(
k
);
s
!=
""
{
if
s
:=
Var
(
k
);
s
!=
""
{
b
,
err
:=
strconv
.
ParseBool
(
s
)
b
,
err
:=
strconv
.
ParseBool
(
s
)
if
err
!=
nil
{
if
err
!=
nil
{
return
true
return
true
...
@@ -151,7 +148,7 @@ var (
...
@@ -151,7 +148,7 @@ var (
func
String
(
s
string
)
func
()
string
{
func
String
(
s
string
)
func
()
string
{
return
func
()
string
{
return
func
()
string
{
return
getenv
(
s
)
return
Var
(
s
)
}
}
}
}
...
@@ -167,7 +164,7 @@ var (
...
@@ -167,7 +164,7 @@ var (
)
)
func
RunnersDir
()
(
p
string
)
{
func
RunnersDir
()
(
p
string
)
{
if
p
:=
getenv
(
"OLLAMA_RUNNERS_DIR"
);
p
!=
""
{
if
p
:=
Var
(
"OLLAMA_RUNNERS_DIR"
);
p
!=
""
{
return
p
return
p
}
}
...
@@ -213,22 +210,29 @@ func RunnersDir() (p string) {
...
@@ -213,22 +210,29 @@ func RunnersDir() (p string) {
return
p
return
p
}
}
func
Int
(
k
string
,
n
int
)
func
()
int
{
func
Uint
(
key
string
,
defaultValue
uint
)
func
()
uint
{
return
func
()
int
{
return
func
()
uint
{
if
s
:=
getenv
(
k
);
s
!=
""
{
if
s
:=
Var
(
key
);
s
!=
""
{
if
n
,
err
:=
strconv
.
ParseInt
(
s
,
10
,
64
);
err
==
nil
&&
n
>=
0
{
if
n
,
err
:=
strconv
.
ParseUint
(
s
,
10
,
64
);
err
!=
nil
{
return
int
(
n
)
slog
.
Warn
(
"invalid environment variable, using default"
,
"key"
,
key
,
"value"
,
s
,
"default"
,
defaultValue
)
}
else
{
return
uint
(
n
)
}
}
}
}
return
n
return
defaultValue
}
}
}
}
var
(
var
(
NumParallel
=
Int
(
"OLLAMA_NUM_PARALLEL"
,
0
)
// NumParallel sets the number of parallel model requests. NumParallel can be configured via the OLLAMA_NUM_PARALLEL environment variable.
MaxRunners
=
Int
(
"OLLAMA_MAX_LOADED_MODELS"
,
0
)
NumParallel
=
Uint
(
"OLLAMA_NUM_PARALLEL"
,
0
)
MaxQueue
=
Int
(
"OLLAMA_MAX_QUEUE"
,
512
)
// MaxRunners sets the maximum number of loaded models. MaxRunners can be configured via the OLLAMA_MAX_LOADED_MODELS environment variable.
MaxRunners
=
Uint
(
"OLLAMA_MAX_LOADED_MODELS"
,
0
)
// MaxQueue sets the maximum number of queued requests. MaxQueue can be configured via the OLLAMA_MAX_QUEUE environment variable.
MaxQueue
=
Uint
(
"OLLAMA_MAX_QUEUE"
,
512
)
// MaxVRAM sets a maximum VRAM override in bytes. MaxVRAM can be configured via the OLLAMA_MAX_VRAM environment variable.
MaxVRAM
=
Uint
(
"OLLAMA_MAX_VRAM"
,
0
)
)
)
type
EnvVar
struct
{
type
EnvVar
struct
{
...
@@ -274,7 +278,7 @@ func Values() map[string]string {
...
@@ -274,7 +278,7 @@ func Values() map[string]string {
return
vals
return
vals
}
}
//
getenv
returns an environment variable stripped of leading and trailing quotes or spaces
//
Var
returns an environment variable stripped of leading and trailing quotes or spaces
func
getenv
(
key
string
)
string
{
func
Var
(
key
string
)
string
{
return
strings
.
Trim
(
os
.
Getenv
(
key
),
"
\"
'
"
)
return
strings
.
Trim
(
strings
.
TrimSpace
(
os
.
Getenv
(
key
)
)
,
"
\"
'"
)
}
}
envconfig/config_test.go
View file @
85d9d73a
...
@@ -30,6 +30,10 @@ func TestHost(t *testing.T) {
...
@@ -30,6 +30,10 @@ func TestHost(t *testing.T) {
"extra quotes"
:
{
"
\"
1.2.3.4
\"
"
,
"1.2.3.4:11434"
},
"extra quotes"
:
{
"
\"
1.2.3.4
\"
"
,
"1.2.3.4:11434"
},
"extra space+quotes"
:
{
"
\"
1.2.3.4
\"
"
,
"1.2.3.4:11434"
},
"extra space+quotes"
:
{
"
\"
1.2.3.4
\"
"
,
"1.2.3.4:11434"
},
"extra single quotes"
:
{
"'1.2.3.4'"
,
"1.2.3.4:11434"
},
"extra single quotes"
:
{
"'1.2.3.4'"
,
"1.2.3.4:11434"
},
"http"
:
{
"http://1.2.3.4"
,
"1.2.3.4:80"
},
"http port"
:
{
"http://1.2.3.4:4321"
,
"1.2.3.4:4321"
},
"https"
:
{
"https://1.2.3.4"
,
"1.2.3.4:443"
},
"https port"
:
{
"https://1.2.3.4:4321"
,
"1.2.3.4:4321"
},
}
}
for
name
,
tt
:=
range
cases
{
for
name
,
tt
:=
range
cases
{
...
@@ -133,24 +137,45 @@ func TestOrigins(t *testing.T) {
...
@@ -133,24 +137,45 @@ func TestOrigins(t *testing.T) {
}
}
func
TestBool
(
t
*
testing
.
T
)
{
func
TestBool
(
t
*
testing
.
T
)
{
cases
:=
map
[
string
]
struct
{
cases
:=
map
[
string
]
bool
{
value
string
""
:
false
,
expect
bool
"true"
:
true
,
}{
"false"
:
false
,
"empty"
:
{
""
,
false
},
"1"
:
true
,
"true"
:
{
"true"
,
true
},
"0"
:
false
,
"false"
:
{
"false"
,
false
},
// invalid values
"1"
:
{
"1"
,
true
},
"random"
:
true
,
"0"
:
{
"0"
,
false
},
"something"
:
true
,
"random"
:
{
"random"
,
true
},
"something"
:
{
"something"
,
true
},
}
}
for
name
,
tt
:=
range
cases
{
for
k
,
v
:=
range
cases
{
t
.
Run
(
name
,
func
(
t
*
testing
.
T
)
{
t
.
Run
(
k
,
func
(
t
*
testing
.
T
)
{
t
.
Setenv
(
"OLLAMA_BOOL"
,
tt
.
value
)
t
.
Setenv
(
"OLLAMA_BOOL"
,
k
)
if
b
:=
Bool
(
"OLLAMA_BOOL"
);
b
()
!=
tt
.
expect
{
if
b
:=
Bool
(
"OLLAMA_BOOL"
)();
b
!=
v
{
t
.
Errorf
(
"%s: expected %t, got %t"
,
name
,
tt
.
expect
,
b
())
t
.
Errorf
(
"%s: expected %t, got %t"
,
k
,
v
,
b
)
}
})
}
}
func
TestUint
(
t
*
testing
.
T
)
{
cases
:=
map
[
string
]
uint
{
"0"
:
0
,
"1"
:
1
,
"1337"
:
1337
,
// default values
""
:
11434
,
"-1"
:
11434
,
"0o10"
:
11434
,
"0x10"
:
11434
,
"string"
:
11434
,
}
for
k
,
v
:=
range
cases
{
t
.
Run
(
k
,
func
(
t
*
testing
.
T
)
{
t
.
Setenv
(
"OLLAMA_UINT"
,
k
)
if
i
:=
Uint
(
"OLLAMA_UINT"
,
11434
)();
i
!=
v
{
t
.
Errorf
(
"%s: expected %d, got %d"
,
k
,
v
,
i
)
}
}
})
})
}
}
...
@@ -188,3 +213,23 @@ func TestKeepAlive(t *testing.T) {
...
@@ -188,3 +213,23 @@ func TestKeepAlive(t *testing.T) {
})
})
}
}
}
}
func
TestVar
(
t
*
testing
.
T
)
{
cases
:=
map
[
string
]
string
{
"value"
:
"value"
,
" value "
:
"value"
,
" 'value' "
:
"value"
,
` "value" `
:
"value"
,
" ' value ' "
:
" value "
,
` " value " `
:
" value "
,
}
for
k
,
v
:=
range
cases
{
t
.
Run
(
k
,
func
(
t
*
testing
.
T
)
{
t
.
Setenv
(
"OLLAMA_VAR"
,
k
)
if
s
:=
Var
(
"OLLAMA_VAR"
);
s
!=
v
{
t
.
Errorf
(
"%s: expected %q, got %q"
,
k
,
v
,
s
)
}
})
}
}
server/sched.go
View file @
85d9d73a
...
@@ -129,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -129,7 +129,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
slog
.
Debug
(
"pending request cancelled or timed out, skipping scheduling"
)
slog
.
Debug
(
"pending request cancelled or timed out, skipping scheduling"
)
continue
continue
}
}
numParallel
:=
envconfig
.
NumParallel
()
numParallel
:=
int
(
envconfig
.
NumParallel
()
)
// TODO (jmorganca): multimodal models don't support parallel yet
// TODO (jmorganca): multimodal models don't support parallel yet
// see https://github.com/ollama/ollama/issues/4165
// see https://github.com/ollama/ollama/issues/4165
if
len
(
pending
.
model
.
ProjectorPaths
)
>
0
&&
numParallel
!=
1
{
if
len
(
pending
.
model
.
ProjectorPaths
)
>
0
&&
numParallel
!=
1
{
...
@@ -151,7 +151,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
...
@@ -151,7 +151,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
pending
.
useLoadedRunner
(
runner
,
s
.
finishedReqCh
)
pending
.
useLoadedRunner
(
runner
,
s
.
finishedReqCh
)
break
break
}
}
}
else
if
envconfig
.
MaxRunners
()
>
0
&&
loadedCount
>=
envconfig
.
MaxRunners
()
{
}
else
if
envconfig
.
MaxRunners
()
>
0
&&
loadedCount
>=
int
(
envconfig
.
MaxRunners
()
)
{
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
slog
.
Debug
(
"max runners achieved, unloading one to make room"
,
"runner_count"
,
loadedCount
)
runnerToExpire
=
s
.
findRunnerToUnload
()
runnerToExpire
=
s
.
findRunnerToUnload
()
}
else
{
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment