refined test timing

adjust timing on some tests so they don't timeout on small/slow GPUs

refined test timing
adjust timing on some tests so they don't timeout on small/slow GPUs
68dfc623 · Daniel Hiltgen · 5e8ff556 · 68dfc623 · 68dfc623 · 68dfc623
Commit 68dfc623 authored May 31, 2024 by Daniel Hiltgen
4 changed files
--- a/integration/concurrency_test.go
+++ b/integration/concurrency_test.go
@@ -22,6 +22,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 				Model:     "orca-mini",
 				Prompt:    "why is the ocean blue?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -30,6 +31,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 				Model:     "tinydolphin",
 				Prompt:    "what is the origin of the us thanksgiving holiday?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -43,7 +45,7 @@ func TestMultiModelConcurrency(t *testing.T) {
 	)
 	var wg sync.WaitGroup
 	wg.Add(len(req))
-	ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second*240)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
@@ -56,32 +58,46 @@ func TestMultiModelConcurrency(t *testing.T) {
 	for i := 0; i < len(req); i++ {
 		go func(i int) {
 			defer wg.Done()
-			DoGenerate(ctx, t, client, req[i], resp[i], 30*time.Second, 10*time.Second)
+			DoGenerate(ctx, t, client, req[i], resp[i], 60*time.Second, 10*time.Second)
 		}(i)
 	}
 	wg.Wait()
 }
 func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) // GTX 750 2G card takes ~9 minutes
+	req, resp := GenerateRequests()
+	reqLimit := len(req)
+	iterLimit := 5
+	vram := os.Getenv("OLLAMA_MAX_VRAM")
+	if vram != "" {
+		max, err := strconv.ParseUint(vram, 10, 64)
+		require.NoError(t, err)
+		// Don't hammer on small VRAM cards...
+		if max < 4*1024*1024*1024 {
+			reqLimit = min(reqLimit, 2)
+			iterLimit = 2
+		}
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), 9*time.Minute)
 	defer cancel()
 	client, _, cleanup := InitServerConnection(ctx, t)
 	defer cleanup()
-	req, resp := GenerateRequests()
 	// Get the server running (if applicable) warm the model up with a single initial request
-	DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 5*time.Second)
+	DoGenerate(ctx, t, client, req[0], resp[0], 60*time.Second, 10*time.Second)
 	var wg sync.WaitGroup
-	wg.Add(len(req))
+	wg.Add(reqLimit)
-	for i := 0; i < len(req); i++ {
+	for i := 0; i < reqLimit; i++ {
 		go func(i int) {
 			defer wg.Done()
-			for j := 0; j < 5; j++ {
+			for j := 0; j < iterLimit; j++ {
 				slog.Info("Starting", "req", i, "iter", j)
-				// On slower GPUs it can take a while to process the 4 concurrent requests
+				// On slower GPUs it can take a while to process the concurrent requests
 				// so we allow a much longer initial timeout
-				DoGenerate(ctx, t, client, req[i], resp[i], 90*time.Second, 5*time.Second)
+				DoGenerate(ctx, t, client, req[i], resp[i], 120*time.Second, 20*time.Second)
 			}
 		}(i)
 	}

--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -11,7 +11,7 @@ import (
 )
 func TestContextExhaustion(t *testing.T) {
-	ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) // Longer needed for small footprint GPUs
+	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute) // Longer needed for small footprint GPUs
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{

--- a/integration/llm_image_test.go
+++ b/integration/llm_image_test.go
@@ -32,7 +32,11 @@ func TestIntegrationMultimodal(t *testing.T) {
 	resp := "the ollam"
 	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
-	GenerateTestHelper(ctx, t, req, []string{resp})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	require.NoError(t, PullIfMissing(ctx, client, req.Model))
+	// llava models on CPU can be quite slow to start,
+	DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
 }
 const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb

--- a/integration/utils_test.go
+++ b/integration/utils_test.go
@@ -140,7 +140,7 @@ func PullIfMissing(ctx context.Context, client *api.Client, modelName string) er
 	showCtx, cancel := context.WithDeadlineCause(
 		ctx,
-		time.Now().Add(5*time.Second),
+		time.Now().Add(10*time.Second),
 		fmt.Errorf("show for existing model %s took too long", modelName),
 	)
 	defer cancel()
@@ -290,6 +290,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 				Model:     "orca-mini",
 				Prompt:    "why is the ocean blue?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -298,6 +299,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 				Model:     "orca-mini",
 				Prompt:    "why is the color of dirt brown?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -306,6 +308,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 				Model:     "orca-mini",
 				Prompt:    "what is the origin of the us thanksgiving holiday?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -314,6 +317,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 				Model:     "orca-mini",
 				Prompt:    "what is the origin of independence day?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,
@@ -322,6 +326,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
 				Model:     "orca-mini",
 				Prompt:    "what is the composition of air?",
 				Stream:    &stream,
+				KeepAlive: &api.Duration{Duration: 10 * time.Second},
 				Options: map[string]interface{}{
 					"seed":        42,
 					"temperature": 0.0,