Fix context exhaustion integration test for small gpus

On the smaller GPUs, the initial model load of llama2 took over 30s (the default timeout for the DoGenerate helper)

Fix context exhaustion integration test for small gpus
On the smaller GPUs, the initial model load of llama2 took over 30s (the default timeout for the DoGenerate helper)
73e2c8f6 · Daniel Hiltgen · 2d1e3c32 · 73e2c8f6
Commit 73e2c8f6 authored Jul 09, 2024 by Daniel Hiltgen
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

integration/context_test.go integration/context_test.go +7 -2

No files found.
--- a/integration/context_test.go
+++ b/integration/context_test.go
@@ -12,7 +12,7 @@ import (
 func TestContextExhaustion(t *testing.T) {
 	// Longer needed for small footprint GPUs
-	ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute)
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
 	defer cancel()
 	// Set up the test data
 	req := api.GenerateRequest{
@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) {
 			"num_ctx":     128,
 		},
 	}
-	GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"})
+	client, _, cleanup := InitServerConnection(ctx, t)
+	defer cleanup()
+	if err := PullIfMissing(ctx, client, req.Model); err != nil {
+		t.Fatalf("PullIfMissing failed: %v", err)
+	}
+	DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second)
 }