Merge pull request #6190 from ollama/mxyng/fix-integration

fix concurrency test

Merge pull request #6190 from ollama/mxyng/fix-integration
fix concurrency test
10d49bce · Michael Yang · GitHub · 50ee8b5f · 7ed36741 · 10d49bce
Unverified Commit 10d49bce authored Aug 05, 2024 by Michael Yang Committed by GitHub Aug 05, 2024
4 changed files
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -5,6 +5,7 @@ package integration
 import (
 	"context"
 	"log/slog"
+	"os"
 	"strconv"
 	"sync"
 	"testing"
@@ -13,7 +14,6 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/ollama/ollama/api"
-	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/format"
 )
@@ -41,8 +41,8 @@ func TestMultiModelConcurrency(t *testing.T) {
 			},
 		}
 		resp = [2][]string{
-			[]string{"sunlight"},
+			{"sunlight"},
-			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
+			{"england", "english", "massachusetts", "pilgrims", "british"},
 		}
 	)
 	var wg sync.WaitGroup
@@ -71,12 +71,11 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 	reqLimit := len(req)
 	iterLimit := 5
-	vram := os.Getenv("OLLAMA_MAX_VRAM") // TODO - discover actual VRAM
+	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
-	if vram != "" {
+		maxVram, err := strconv.ParseUint(s, 10, 64)
-		max, err := strconv.ParseUint(vram, 10, 64)
 		require.NoError(t, err)
 		// Don't hammer on small VRAM cards...
-		if max < 4*1024*1024*1024 {
+		if maxVram < 4*format.GibiByte {
 			reqLimit = min(reqLimit, 2)
 			iterLimit = 2
 		}
@@ -233,12 +232,12 @@ func TestMultiModelStress(t *testing.T) {
 	consumed := uint64(256 * format.MebiByte) // Assume some baseline usage
 	for i := 0; i < len(req); i++ {
 		// Always get at least 2 models, but dont' overshoot VRAM too much or we'll take too long
-		if i > 1 && consumed > vram {
+		if i > 1 && consumed > maxVram {
-			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
+			slog.Info("achieved target vram exhaustion", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
 			break
 		}
 		consumed += chosenModels[i].size
-		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(vram), "models", format.HumanBytes2(consumed))
+		slog.Info("target vram", "count", i, "vram", format.HumanBytes2(maxVram), "models", format.HumanBytes2(consumed))
 		wg.Add(1)
 		go func(i int) {

--- a/integration/llm_test.go
+++ b/integration/llm_test.go
@@ -35,8 +35,8 @@ var (
 		},
 	}
 	resp = [2][]string{
-		[]string{"sunlight"},
+		{"sunlight"},
-		[]string{"england", "english", "massachusetts", "pilgrims"},
+		{"england", "english", "massachusetts", "pilgrims"},
 	}
 )

--- a/integration/max_queue_test.go
+++ b/integration/max_queue_test.go
@@ -29,7 +29,7 @@ func TestMaxQueue(t *testing.T) {
 	// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
 	threadCount := 32
 	if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
-		threadCount = maxQueue
+		threadCount = int(maxQueue)
 	} else {
 		t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
 	}

--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -334,10 +334,10 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 			},
 		},
 		[][]string{
-			[]string{"sunlight"},
+			{"sunlight"},
-			[]string{"soil", "organic", "earth", "black", "tan"},
+			{"soil", "organic", "earth", "black", "tan"},
-			[]string{"england", "english", "massachusetts", "pilgrims", "british"},
+			{"england", "english", "massachusetts", "pilgrims", "british"},
-			[]string{"fourth", "july", "declaration", "independence"},
+			{"fourth", "july", "declaration", "independence"},
-			[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
+			{"nitrogen", "oxygen", "carbon", "dioxide"},
 		}
 }