"tests/git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "40e7450f6fd1ba27a71c58da2b49cd19a46b7678"
Commit 73e2c8f6 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Fix context exhaustion integration test for small gpus

On the smaller GPUs, the initial model load of llama2 took over 30s (the
default timeout for the DoGenerate helper)
parent 2d1e3c32
...@@ -12,7 +12,7 @@ import ( ...@@ -12,7 +12,7 @@ import (
func TestContextExhaustion(t *testing.T) { func TestContextExhaustion(t *testing.T) {
// Longer needed for small footprint GPUs // Longer needed for small footprint GPUs
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.GenerateRequest{
...@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) { ...@@ -25,5 +25,10 @@ func TestContextExhaustion(t *testing.T) {
"num_ctx": 128, "num_ctx": 128,
}, },
} }
GenerateTestHelper(ctx, t, req, []string{"once", "upon", "lived"}) client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("PullIfMissing failed: %v", err)
}
DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived"}, 120*time.Second, 10*time.Second)
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment