Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
7ed36741
Commit
7ed36741
authored
Aug 05, 2024
by
Michael Yang
Browse files
fix concurrency test
parent
50ee8b5f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
18 deletions
+17
-18
integration/concurrency_test.go
integration/concurrency_test.go
+9
-10
integration/llm_test.go
integration/llm_test.go
+2
-2
integration/max_queue_test.go
integration/max_queue_test.go
+1
-1
integration/utils_test.go
integration/utils_test.go
+5
-5
No files found.
integration/concurrency_test.go
View file @
7ed36741
...
@@ -5,6 +5,7 @@ package integration
...
@@ -5,6 +5,7 @@ package integration
import
(
import
(
"context"
"context"
"log/slog"
"log/slog"
"os"
"strconv"
"strconv"
"sync"
"sync"
"testing"
"testing"
...
@@ -13,7 +14,6 @@ import (
...
@@ -13,7 +14,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
)
)
...
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
...
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
},
},
}
}
resp
=
[
2
][]
string
{
resp
=
[
2
][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
}
}
)
)
var
wg
sync
.
WaitGroup
var
wg
sync
.
WaitGroup
...
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
...
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
reqLimit
:=
len
(
req
)
reqLimit
:=
len
(
req
)
iterLimit
:=
5
iterLimit
:=
5
vram
:=
os
.
Getenv
(
"OLLAMA_MAX_VRAM"
)
// TODO - discover actual VRAM
if
s
:=
os
.
Getenv
(
"OLLAMA_MAX_VRAM"
);
s
!=
""
{
if
vram
!=
""
{
maxVram
,
err
:=
strconv
.
ParseUint
(
s
,
10
,
64
)
max
,
err
:=
strconv
.
ParseUint
(
vram
,
10
,
64
)
require
.
NoError
(
t
,
err
)
require
.
NoError
(
t
,
err
)
// Don't hammer on small VRAM cards...
// Don't hammer on small VRAM cards...
if
max
<
4
*
1024
*
1024
*
1024
{
if
max
Vram
<
4
*
format
.
GibiByte
{
reqLimit
=
min
(
reqLimit
,
2
)
reqLimit
=
min
(
reqLimit
,
2
)
iterLimit
=
2
iterLimit
=
2
}
}
...
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
...
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
consumed
:=
uint64
(
256
*
format
.
MebiByte
)
// Assume some baseline usage
consumed
:=
uint64
(
256
*
format
.
MebiByte
)
// Assume some baseline usage
for
i
:=
0
;
i
<
len
(
req
);
i
++
{
for
i
:=
0
;
i
<
len
(
req
);
i
++
{
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
if
i
>
1
&&
consumed
>
v
ram
{
if
i
>
1
&&
consumed
>
maxV
ram
{
slog
.
Info
(
"achieved target vram exhaustion"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
v
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
slog
.
Info
(
"achieved target vram exhaustion"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
maxV
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
break
break
}
}
consumed
+=
chosenModels
[
i
]
.
size
consumed
+=
chosenModels
[
i
]
.
size
slog
.
Info
(
"target vram"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
v
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
slog
.
Info
(
"target vram"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
maxV
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
wg
.
Add
(
1
)
wg
.
Add
(
1
)
go
func
(
i
int
)
{
go
func
(
i
int
)
{
...
...
integration/llm_test.go
View file @
7ed36741
...
@@ -35,8 +35,8 @@ var (
...
@@ -35,8 +35,8 @@ var (
},
},
}
}
resp
=
[
2
][]
string
{
resp
=
[
2
][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
},
}
}
)
)
...
...
integration/max_queue_test.go
View file @
7ed36741
...
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
...
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount
:=
32
threadCount
:=
32
if
maxQueue
:=
envconfig
.
MaxQueue
();
maxQueue
!=
0
{
if
maxQueue
:=
envconfig
.
MaxQueue
();
maxQueue
!=
0
{
threadCount
=
maxQueue
threadCount
=
int
(
maxQueue
)
}
else
{
}
else
{
t
.
Setenv
(
"OLLAMA_MAX_QUEUE"
,
strconv
.
Itoa
(
threadCount
))
t
.
Setenv
(
"OLLAMA_MAX_QUEUE"
,
strconv
.
Itoa
(
threadCount
))
}
}
...
...
integration/utils_test.go
View file @
7ed36741
...
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
...
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
},
},
},
},
[][]
string
{
[][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"soil"
,
"organic"
,
"earth"
,
"black"
,
"tan"
},
{
"soil"
,
"organic"
,
"earth"
,
"black"
,
"tan"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
[]
string
{
"fourth"
,
"july"
,
"declaration"
,
"independence"
},
{
"fourth"
,
"july"
,
"declaration"
,
"independence"
},
[]
string
{
"nitrogen"
,
"oxygen"
,
"carbon"
,
"dioxide"
},
{
"nitrogen"
,
"oxygen"
,
"carbon"
,
"dioxide"
},
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment