Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
10d49bce
Unverified
Commit
10d49bce
authored
Aug 05, 2024
by
Michael Yang
Committed by
GitHub
Aug 05, 2024
Browse files
Merge pull request #6190 from ollama/mxyng/fix-integration
fix concurrency test
parents
50ee8b5f
7ed36741
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
18 deletions
+17
-18
integration/concurrency_test.go
integration/concurrency_test.go
+9
-10
integration/llm_test.go
integration/llm_test.go
+2
-2
integration/max_queue_test.go
integration/max_queue_test.go
+1
-1
integration/utils_test.go
integration/utils_test.go
+5
-5
No files found.
integration/concurrency_test.go
View file @
10d49bce
...
@@ -5,6 +5,7 @@ package integration
...
@@ -5,6 +5,7 @@ package integration
import
(
import
(
"context"
"context"
"log/slog"
"log/slog"
"os"
"strconv"
"strconv"
"sync"
"sync"
"testing"
"testing"
...
@@ -13,7 +14,6 @@ import (
...
@@ -13,7 +14,6 @@ import (
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/format"
)
)
...
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
...
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
},
},
}
}
resp
=
[
2
][]
string
{
resp
=
[
2
][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
}
}
)
)
var
wg
sync
.
WaitGroup
var
wg
sync
.
WaitGroup
...
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
...
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
reqLimit
:=
len
(
req
)
reqLimit
:=
len
(
req
)
iterLimit
:=
5
iterLimit
:=
5
vram
:=
os
.
Getenv
(
"OLLAMA_MAX_VRAM"
)
// TODO - discover actual VRAM
if
s
:=
os
.
Getenv
(
"OLLAMA_MAX_VRAM"
);
s
!=
""
{
if
vram
!=
""
{
maxVram
,
err
:=
strconv
.
ParseUint
(
s
,
10
,
64
)
max
,
err
:=
strconv
.
ParseUint
(
vram
,
10
,
64
)
require
.
NoError
(
t
,
err
)
require
.
NoError
(
t
,
err
)
// Don't hammer on small VRAM cards...
// Don't hammer on small VRAM cards...
if
max
<
4
*
1024
*
1024
*
1024
{
if
max
Vram
<
4
*
format
.
GibiByte
{
reqLimit
=
min
(
reqLimit
,
2
)
reqLimit
=
min
(
reqLimit
,
2
)
iterLimit
=
2
iterLimit
=
2
}
}
...
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
...
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
consumed
:=
uint64
(
256
*
format
.
MebiByte
)
// Assume some baseline usage
consumed
:=
uint64
(
256
*
format
.
MebiByte
)
// Assume some baseline usage
for
i
:=
0
;
i
<
len
(
req
);
i
++
{
for
i
:=
0
;
i
<
len
(
req
);
i
++
{
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
if
i
>
1
&&
consumed
>
v
ram
{
if
i
>
1
&&
consumed
>
maxV
ram
{
slog
.
Info
(
"achieved target vram exhaustion"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
v
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
slog
.
Info
(
"achieved target vram exhaustion"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
maxV
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
break
break
}
}
consumed
+=
chosenModels
[
i
]
.
size
consumed
+=
chosenModels
[
i
]
.
size
slog
.
Info
(
"target vram"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
v
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
slog
.
Info
(
"target vram"
,
"count"
,
i
,
"vram"
,
format
.
HumanBytes2
(
maxV
ram
),
"models"
,
format
.
HumanBytes2
(
consumed
))
wg
.
Add
(
1
)
wg
.
Add
(
1
)
go
func
(
i
int
)
{
go
func
(
i
int
)
{
...
...
integration/llm_test.go
View file @
10d49bce
...
@@ -35,8 +35,8 @@ var (
...
@@ -35,8 +35,8 @@ var (
},
},
}
}
resp
=
[
2
][]
string
{
resp
=
[
2
][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
},
}
}
)
)
...
...
integration/max_queue_test.go
View file @
10d49bce
...
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
...
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount
:=
32
threadCount
:=
32
if
maxQueue
:=
envconfig
.
MaxQueue
();
maxQueue
!=
0
{
if
maxQueue
:=
envconfig
.
MaxQueue
();
maxQueue
!=
0
{
threadCount
=
maxQueue
threadCount
=
int
(
maxQueue
)
}
else
{
}
else
{
t
.
Setenv
(
"OLLAMA_MAX_QUEUE"
,
strconv
.
Itoa
(
threadCount
))
t
.
Setenv
(
"OLLAMA_MAX_QUEUE"
,
strconv
.
Itoa
(
threadCount
))
}
}
...
...
integration/utils_test.go
View file @
10d49bce
...
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
...
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
},
},
},
},
[][]
string
{
[][]
string
{
[]
string
{
"sunlight"
},
{
"sunlight"
},
[]
string
{
"soil"
,
"organic"
,
"earth"
,
"black"
,
"tan"
},
{
"soil"
,
"organic"
,
"earth"
,
"black"
,
"tan"
},
[]
string
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
{
"england"
,
"english"
,
"massachusetts"
,
"pilgrims"
,
"british"
},
[]
string
{
"fourth"
,
"july"
,
"declaration"
,
"independence"
},
{
"fourth"
,
"july"
,
"declaration"
,
"independence"
},
[]
string
{
"nitrogen"
,
"oxygen"
,
"carbon"
,
"dioxide"
},
{
"nitrogen"
,
"oxygen"
,
"carbon"
,
"dioxide"
},
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment