"docs/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "5802c2e3f27c6b45290773691bbece4091b69ddc"
Unverified Commit 4e5d862e authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Integration test tuning (#12492)

Remove some flaky scenarios, and switch to chat for better reliability
parent 303be930
...@@ -17,16 +17,21 @@ func TestBlueSky(t *testing.T) { ...@@ -17,16 +17,21 @@ func TestBlueSky(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.ChatRequest{
Model: smol, Model: smol,
Prompt: blueSkyPrompt, Messages: []api.Message{
{
Role: "user",
Content: blueSkyPrompt,
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
"seed": 123, "seed": 123,
}, },
} }
GenerateTestHelper(ctx, t, req, blueSkyExpected) ChatTestHelper(ctx, t, req, blueSkyExpected)
} }
func TestUnicode(t *testing.T) { func TestUnicode(t *testing.T) {
...@@ -34,10 +39,15 @@ func TestUnicode(t *testing.T) { ...@@ -34,10 +39,15 @@ func TestUnicode(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.ChatRequest{
// DeepSeek has a Unicode tokenizer regex, making it a unicode torture test // DeepSeek has a Unicode tokenizer regex, making it a unicode torture test
Model: "deepseek-coder-v2:16b-lite-instruct-q2_K", // TODO is there an ollama-engine model we can switch to and keep the coverage? Model: "deepseek-coder-v2:16b-lite-instruct-q2_K", // TODO is there an ollama-engine model we can switch to and keep the coverage?
Prompt: "天空为什么是蓝色的?", // Why is the sky blue? Messages: []api.Message{
{
Role: "user",
Content: "天空为什么是蓝色的?", // Why is the sky blue?
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
...@@ -57,9 +67,14 @@ func TestUnicode(t *testing.T) { ...@@ -57,9 +67,14 @@ func TestUnicode(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("failed to load model %s: %s", req.Model, err) t.Fatalf("failed to load model %s: %s", req.Model, err)
} }
defer func() {
// best effort unload once we're done with the model
client.Generate(ctx, &api.GenerateRequest{Model: req.Model, KeepAlive: &api.Duration{Duration: 0}}, func(rsp api.GenerateResponse) error { return nil })
}()
skipIfNotGPULoaded(ctx, t, client, req.Model, 100) skipIfNotGPULoaded(ctx, t, client, req.Model, 100)
DoGenerate(ctx, t, client, req, []string{ DoChat(ctx, t, client, req, []string{
"散射", // scattering "散射", // scattering
"频率", // frequency "频率", // frequency
}, 120*time.Second, 120*time.Second) }, 120*time.Second, 120*time.Second)
...@@ -69,9 +84,14 @@ func TestExtendedUnicodeOutput(t *testing.T) { ...@@ -69,9 +84,14 @@ func TestExtendedUnicodeOutput(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.ChatRequest{
Model: "gemma2:2b", Model: "gemma2:2b",
Prompt: "Output some smily face emoji", Messages: []api.Message{
{
Role: "user",
Content: "Output some smily face emoji",
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
...@@ -83,7 +103,7 @@ func TestExtendedUnicodeOutput(t *testing.T) { ...@@ -83,7 +103,7 @@ func TestExtendedUnicodeOutput(t *testing.T) {
if err := PullIfMissing(ctx, client, req.Model); err != nil { if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err) t.Fatal(err)
} }
DoGenerate(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second) DoChat(ctx, t, client, req, []string{"😀", "😊", "😁", "😂", "😄", "😃"}, 120*time.Second, 120*time.Second)
} }
func TestUnicodeModelDir(t *testing.T) { func TestUnicodeModelDir(t *testing.T) {
...@@ -108,14 +128,19 @@ func TestUnicodeModelDir(t *testing.T) { ...@@ -108,14 +128,19 @@ func TestUnicodeModelDir(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel() defer cancel()
req := api.GenerateRequest{ req := api.ChatRequest{
Model: smol, Model: smol,
Prompt: blueSkyPrompt, Messages: []api.Message{
{
Role: "user",
Content: blueSkyPrompt,
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
"seed": 123, "seed": 123,
}, },
} }
GenerateTestHelper(ctx, t, req, blueSkyExpected) ChatTestHelper(ctx, t, req, blueSkyExpected)
} }
...@@ -20,9 +20,9 @@ import ( ...@@ -20,9 +20,9 @@ import (
) )
// Send multiple requests in parallel (concurrently) to a single model and ensure responses are expected // Send multiple requests in parallel (concurrently) to a single model and ensure responses are expected
func TestConcurrentGenerate(t *testing.T) { func TestConcurrentChat(t *testing.T) {
// Assumes all requests have the same model // Assumes all requests have the same model
req, resp := GenerateRequests() req, resp := ChatRequests()
numParallel := int(envconfig.NumParallel() + 1) numParallel := int(envconfig.NumParallel() + 1)
iterLimit := 3 iterLimit := 3
...@@ -57,7 +57,7 @@ func TestConcurrentGenerate(t *testing.T) { ...@@ -57,7 +57,7 @@ func TestConcurrentGenerate(t *testing.T) {
slog.Info("Starting", "thread", i, "iter", j) slog.Info("Starting", "thread", i, "iter", j)
// On slower GPUs it can take a while to process the concurrent requests // On slower GPUs it can take a while to process the concurrent requests
// so we allow a much longer initial timeout // so we allow a much longer initial timeout
DoGenerate(ctx, t, client, req[k], resp[k], 120*time.Second, 20*time.Second) DoChat(ctx, t, client, req[k], resp[k], 120*time.Second, 20*time.Second)
} }
}(i) }(i)
} }
...@@ -163,7 +163,7 @@ chooseModels: ...@@ -163,7 +163,7 @@ chooseModels:
wg.Add(1) wg.Add(1)
go func(i int) { go func(i int) {
defer wg.Done() defer wg.Done()
reqs, resps := GenerateRequests() reqs, resps := ChatRequests()
for j := 0; j < 3; j++ { for j := 0; j < 3; j++ {
if time.Now().Sub(started) > softTimeout { if time.Now().Sub(started) > softTimeout {
slog.Info("exceeded soft timeout, winding down test") slog.Info("exceeded soft timeout, winding down test")
...@@ -171,8 +171,8 @@ chooseModels: ...@@ -171,8 +171,8 @@ chooseModels:
} }
k := r.Int() % len(reqs) k := r.Int() % len(reqs)
reqs[k].Model = chosenModels[i] reqs[k].Model = chosenModels[i]
slog.Info("Starting", "model", reqs[k].Model, "iteration", j, "request", reqs[k].Prompt) slog.Info("Starting", "model", reqs[k].Model, "iteration", j, "request", reqs[k].Messages[0].Content)
DoGenerate(ctx, t, client, reqs[k], resps[k], DoChat(ctx, t, client, reqs[k], resps[k],
120*time.Second, // Be extra patient for the model to load initially 120*time.Second, // Be extra patient for the model to load initially
10*time.Second, // Once results start streaming, fail if they stall 10*time.Second, // Once results start streaming, fail if they stall
) )
......
...@@ -21,9 +21,14 @@ func TestLongInputContext(t *testing.T) { ...@@ -21,9 +21,14 @@ func TestLongInputContext(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.ChatRequest{
Model: smol, Model: smol,
Prompt: "Oh, don’t speak to me of Austria. Perhaps I don’t understand things, but Austria never has wished, and does not wish, for war. She is betraying us! Russia alone must save Europe. Our gracious sovereign recognizes his high vocation and will be true to it. That is the one thing I have faith in! Our good and wonderful sovereign has to perform the noblest role on earth, and he is so virtuous and noble that God will not forsake him. He will fulfill his vocation and crush the hydra of revolution, which has become more terrible than ever in the person of this murderer and villain! We alone must avenge the blood of the just one.... Whom, I ask you, can we rely on?... England with her commercial spirit will not and cannot understand the Emperor Alexander’s loftiness of soul. She has refused to evacuate Malta. She wanted to find, and still seeks, some secret motive in our actions. What answer did Novosíltsev get? None. The English have not understood and cannot understand the self-abnegation of our Emperor who wants nothing for himself, but only desires the good of mankind. And what have they promised? Nothing! And what little they have promised they will not perform! Prussia has always declared that Buonaparte is invincible, and that all Europe is powerless before him.... And I don’t believe a word that Hardenburg says, or Haugwitz either. This famous Prussian neutrality is just a trap. I have faith only in God and the lofty destiny of our adored monarch. He will save Europe! What country is this referring to?", Messages: []api.Message{
{
Role: "user",
Content: "Oh, don’t speak to me of Austria. Perhaps I don’t understand things, but Austria never has wished, and does not wish, for war. She is betraying us! Russia alone must save Europe. Our gracious sovereign recognizes his high vocation and will be true to it. That is the one thing I have faith in! Our good and wonderful sovereign has to perform the noblest role on earth, and he is so virtuous and noble that God will not forsake him. He will fulfill his vocation and crush the hydra of revolution, which has become more terrible than ever in the person of this murderer and villain! We alone must avenge the blood of the just one.... Whom, I ask you, can we rely on?... England with her commercial spirit will not and cannot understand the Emperor Alexander’s loftiness of soul. She has refused to evacuate Malta. She wanted to find, and still seeks, some secret motive in our actions. What answer did Novosíltsev get? None. The English have not understood and cannot understand the self-abnegation of our Emperor who wants nothing for himself, but only desires the good of mankind. And what have they promised? Nothing! And what little they have promised they will not perform! Prussia has always declared that Buonaparte is invincible, and that all Europe is powerless before him.... And I don’t believe a word that Hardenburg says, or Haugwitz either. This famous Prussian neutrality is just a trap. I have faith only in God and the lofty destiny of our adored monarch. He will save Europe! What country is this referring to?",
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
...@@ -36,7 +41,7 @@ func TestLongInputContext(t *testing.T) { ...@@ -36,7 +41,7 @@ func TestLongInputContext(t *testing.T) {
if err := PullIfMissing(ctx, client, req.Model); err != nil { if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("PullIfMissing failed: %v", err) t.Fatalf("PullIfMissing failed: %v", err)
} }
DoGenerate(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second) DoChat(ctx, t, client, req, []string{"russia", "german", "france", "england", "austria", "prussia", "europe", "individuals", "coalition", "conflict"}, 120*time.Second, 10*time.Second)
} }
func TestContextExhaustion(t *testing.T) { func TestContextExhaustion(t *testing.T) {
...@@ -48,9 +53,14 @@ func TestContextExhaustion(t *testing.T) { ...@@ -48,9 +53,14 @@ func TestContextExhaustion(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel() defer cancel()
// Set up the test data // Set up the test data
req := api.GenerateRequest{ req := api.ChatRequest{
Model: smol, Model: smol,
Prompt: "Write me a story in english with a lot of emojis", Messages: []api.Message{
{
Role: "user",
Content: "Write me a story in english with a lot of emojis",
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"temperature": 0, "temperature": 0,
...@@ -63,12 +73,12 @@ func TestContextExhaustion(t *testing.T) { ...@@ -63,12 +73,12 @@ func TestContextExhaustion(t *testing.T) {
if err := PullIfMissing(ctx, client, req.Model); err != nil { if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("PullIfMissing failed: %v", err) t.Fatalf("PullIfMissing failed: %v", err)
} }
DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second) DoChat(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
} }
// Send multiple generate requests with prior context and ensure the response is coherant and expected // Send multiple generate requests with prior context and ensure the response is coherant and expected
func TestParallelGenerateWithHistory(t *testing.T) { func TestParallelGenerateWithHistory(t *testing.T) {
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model modelOverride := "gpt-oss:20b"
req, resp := GenerateRequests() req, resp := GenerateRequests()
numParallel := 2 numParallel := 2
iterLimit := 2 iterLimit := 2
...@@ -155,7 +165,7 @@ func TestGenerateWithHistory(t *testing.T) { ...@@ -155,7 +165,7 @@ func TestGenerateWithHistory(t *testing.T) {
// Send multiple chat requests with prior context and ensure the response is coherant and expected // Send multiple chat requests with prior context and ensure the response is coherant and expected
func TestParallelChatWithHistory(t *testing.T) { func TestParallelChatWithHistory(t *testing.T) {
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model modelOverride := "gpt-oss:20b"
req, resp := ChatRequests() req, resp := ChatRequests()
numParallel := 2 numParallel := 2
iterLimit := 2 iterLimit := 2
......
...@@ -15,7 +15,7 @@ import ( ...@@ -15,7 +15,7 @@ import (
// First run of this scenario on a target system will take a long time to download // First run of this scenario on a target system will take a long time to download
// ~1.5TB of models. Set a sufficiently large -timeout for your network speed // ~1.5TB of models. Set a sufficiently large -timeout for your network speed
func TestLibraryModelsGenerate(t *testing.T) { func TestLibraryModelsChat(t *testing.T) {
softTimeout, hardTimeout := getTimeouts(t) softTimeout, hardTimeout := getTimeouts(t)
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout) slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout) ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
...@@ -43,9 +43,14 @@ func TestLibraryModelsGenerate(t *testing.T) { ...@@ -43,9 +43,14 @@ func TestLibraryModelsGenerate(t *testing.T) {
t.Skip(fmt.Sprintf("Skipping %s architecture %s != %s", model, arch, targetArch)) t.Skip(fmt.Sprintf("Skipping %s architecture %s != %s", model, arch, targetArch))
} }
} }
req := api.GenerateRequest{ req := api.ChatRequest{
Model: model, Model: model,
Prompt: blueSkyPrompt, Messages: []api.Message{
{
Role: "user",
Content: blueSkyPrompt,
},
},
KeepAlive: &api.Duration{Duration: 10 * time.Second}, KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{ Options: map[string]interface{}{
"temperature": 0.1, "temperature": 0.1,
...@@ -58,13 +63,13 @@ func TestLibraryModelsGenerate(t *testing.T) { ...@@ -58,13 +63,13 @@ func TestLibraryModelsGenerate(t *testing.T) {
anyResp = []string{"select", "from"} anyResp = []string{"select", "from"}
} else if model == "granite3-guardian" || model == "shieldgemma" || model == "llama-guard3" || model == "bespoke-minicheck" { } else if model == "granite3-guardian" || model == "shieldgemma" || model == "llama-guard3" || model == "bespoke-minicheck" {
anyResp = []string{"yes", "no", "safe", "unsafe"} anyResp = []string{"yes", "no", "safe", "unsafe"}
} else if model == "openthinker" || model == "nexusraven" { } else if model == "openthinker" {
anyResp = []string{"plugin", "im_sep", "components", "function call"} anyResp = []string{"plugin", "im_sep", "components", "function call"}
} else if model == "starcoder" || model == "starcoder2" || model == "magicoder" || model == "deepseek-coder" { } else if model == "starcoder" || model == "starcoder2" || model == "magicoder" || model == "deepseek-coder" {
req.Prompt = "def fibonacci():" req.Messages[0].Content = "def fibonacci():"
anyResp = []string{"f(n)", "sequence", "n-1", "main()", "__main__", "while"} anyResp = []string{"f(n)", "sequence", "n-1", "main()", "__main__", "while"}
} }
DoGenerate(ctx, t, client, req, anyResp, 120*time.Second, 30*time.Second) DoChat(ctx, t, client, req, anyResp, 120*time.Second, 30*time.Second)
}) })
} }
} }
...@@ -34,17 +34,22 @@ func TestVisionModels(t *testing.T) { ...@@ -34,17 +34,22 @@ func TestVisionModels(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
req := api.GenerateRequest{ req := api.ChatRequest{
Model: v.model, Model: v.model,
Prompt: "what does the text in this image say?", Messages: []api.Message{
{
Role: "user",
Content: "what does the text in this image say?",
Images: []api.ImageData{
image,
},
},
},
Stream: &stream, Stream: &stream,
Options: map[string]any{ Options: map[string]any{
"seed": 42, "seed": 42,
"temperature": 0.0, "temperature": 0.0,
}, },
Images: []api.ImageData{
image,
},
} }
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel() defer cancel()
...@@ -56,8 +61,15 @@ func TestVisionModels(t *testing.T) { ...@@ -56,8 +61,15 @@ func TestVisionModels(t *testing.T) {
if err := PullIfMissing(ctx, client, req.Model); err != nil { if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err) t.Fatal(err)
} }
// Preload to skip if we're less than 80% on GPU to avoid extremely slow tests
err = client.Generate(ctx, &api.GenerateRequest{Model: req.Model}, func(response api.GenerateResponse) error { return nil })
if err != nil {
t.Fatalf("failed to load model %s: %s", req.Model, err)
}
skipIfNotGPULoaded(ctx, t, client, req.Model, 80)
// llava models on CPU can be quite slow to start // llava models on CPU can be quite slow to start
DoGenerate(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second) DoChat(ctx, t, client, req, []string{resp}, 240*time.Second, 30*time.Second)
}) })
} }
} }
......
...@@ -19,7 +19,7 @@ import ( ...@@ -19,7 +19,7 @@ import (
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
func TestModelsGenerate(t *testing.T) { func TestModelsChat(t *testing.T) {
softTimeout, hardTimeout := getTimeouts(t) softTimeout, hardTimeout := getTimeouts(t)
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout) slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout) ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
...@@ -66,15 +66,23 @@ func TestModelsGenerate(t *testing.T) { ...@@ -66,15 +66,23 @@ func TestModelsGenerate(t *testing.T) {
} }
} }
// TODO - fiddle with context size // TODO - fiddle with context size
req := api.GenerateRequest{ req := api.ChatRequest{
Model: model, Model: model,
Prompt: blueSkyPrompt, Messages: []api.Message{
{
Role: "user",
Content: blueSkyPrompt,
},
},
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{ Options: map[string]interface{}{
"temperature": 0, "temperature": 0,
"seed": 123, "seed": 123,
}, },
} }
DoGenerate(ctx, t, client, req, blueSkyExpected, 120*time.Second, 30*time.Second) DoChat(ctx, t, client, req, blueSkyExpected, 120*time.Second, 30*time.Second)
// best effort unload once we're done with the model
client.Generate(ctx, &api.GenerateRequest{Model: req.Model, KeepAlive: &api.Duration{Duration: 0}}, func(rsp api.GenerateResponse) error { return nil })
}) })
} }
} }
...@@ -128,8 +136,9 @@ func TestModelsEmbed(t *testing.T) { ...@@ -128,8 +136,9 @@ func TestModelsEmbed(t *testing.T) {
} }
} }
req := api.EmbeddingRequest{ req := api.EmbeddingRequest{
Model: model, Model: model,
Prompt: "why is the sky blue?", Prompt: "why is the sky blue?",
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{ Options: map[string]interface{}{
"temperature": 0, "temperature": 0,
"seed": 123, "seed": 123,
...@@ -139,6 +148,10 @@ func TestModelsEmbed(t *testing.T) { ...@@ -139,6 +148,10 @@ func TestModelsEmbed(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("embeddings call failed %s", err) t.Fatalf("embeddings call failed %s", err)
} }
defer func() {
// best effort unload once we're done with the model
client.Generate(ctx, &api.GenerateRequest{Model: req.Model, KeepAlive: &api.Duration{Duration: 0}}, func(rsp api.GenerateResponse) error { return nil })
}()
if len(resp.Embedding) == 0 { if len(resp.Embedding) == 0 {
t.Errorf("zero length embedding response") t.Errorf("zero length embedding response")
} }
......
...@@ -173,9 +173,14 @@ func doModelPerfTest(t *testing.T, chatModels []string) { ...@@ -173,9 +173,14 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
slog.Info("skipping long prompt", "model", model, "num_ctx", numCtx, "gpu_percent", gpuPercent) slog.Info("skipping long prompt", "model", model, "num_ctx", numCtx, "gpu_percent", gpuPercent)
continue continue
} }
req := api.GenerateRequest{ req := api.ChatRequest{
Model: model, Model: model,
Prompt: tc.prompt, Messages: []api.Message{
{
Role: "user",
Content: tc.prompt,
},
},
KeepAlive: &api.Duration{Duration: 20 * time.Second}, // long enough to ensure a ps returns KeepAlive: &api.Duration{Duration: 20 * time.Second}, // long enough to ensure a ps returns
Options: map[string]interface{}{ Options: map[string]interface{}{
"temperature": 0, "temperature": 0,
...@@ -184,7 +189,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) { ...@@ -184,7 +189,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
}, },
} }
atLeastOne := false atLeastOne := false
var resp api.GenerateResponse var resp api.ChatResponse
stream := false stream := false
req.Stream = &stream req.Stream = &stream
...@@ -198,7 +203,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) { ...@@ -198,7 +203,7 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
) )
defer cancel() defer cancel()
err = client.Generate(genCtx, &req, func(rsp api.GenerateResponse) error { err = client.Chat(genCtx, &req, func(rsp api.ChatResponse) error {
resp = rsp resp = rsp
return nil return nil
}) })
...@@ -214,13 +219,13 @@ func doModelPerfTest(t *testing.T, chatModels []string) { ...@@ -214,13 +219,13 @@ func doModelPerfTest(t *testing.T, chatModels []string) {
} }
loaded = true loaded = true
for _, expResp := range tc.anyResp { for _, expResp := range tc.anyResp {
if strings.Contains(strings.ToLower(resp.Response), expResp) { if strings.Contains(strings.ToLower(resp.Message.Content), expResp) {
atLeastOne = true atLeastOne = true
break break
} }
} }
if !atLeastOne { if !atLeastOne {
t.Fatalf("response didn't contain expected values: ctx:%d expected:%v response:%s ", numCtx, tc.anyResp, resp.Response) t.Fatalf("response didn't contain expected values: ctx:%d expected:%v response:%s ", numCtx, tc.anyResp, resp.Message.Content)
} }
models, err := client.ListRunning(ctx) models, err := client.ListRunning(ctx)
if err != nil { if err != nil {
......
...@@ -74,9 +74,14 @@ func TestQuantization(t *testing.T) { ...@@ -74,9 +74,14 @@ func TestQuantization(t *testing.T) {
} }
stream := true stream := true
genReq := api.GenerateRequest{ chatReq := api.ChatRequest{
Model: newName, Model: newName,
Prompt: blueSkyPrompt, Messages: []api.Message{
{
Role: "user",
Content: blueSkyPrompt,
},
},
KeepAlive: &api.Duration{Duration: 3 * time.Second}, KeepAlive: &api.Duration{Duration: 3 * time.Second},
Options: map[string]any{ Options: map[string]any{
"seed": 42, "seed": 42,
...@@ -91,8 +96,8 @@ func TestQuantization(t *testing.T) { ...@@ -91,8 +96,8 @@ func TestQuantization(t *testing.T) {
reqCtx, reqCancel := context.WithCancel(ctx) reqCtx, reqCancel := context.WithCancel(ctx)
atLeastOne := false atLeastOne := false
var buf bytes.Buffer var buf bytes.Buffer
genfn := func(response api.GenerateResponse) error { chatfn := func(response api.ChatResponse) error {
buf.Write([]byte(response.Response)) buf.Write([]byte(response.Message.Content))
fullResp := strings.ToLower(buf.String()) fullResp := strings.ToLower(buf.String())
for _, resp := range blueSkyExpected { for _, resp := range blueSkyExpected {
if strings.Contains(fullResp, resp) { if strings.Contains(fullResp, resp) {
...@@ -108,14 +113,14 @@ func TestQuantization(t *testing.T) { ...@@ -108,14 +113,14 @@ func TestQuantization(t *testing.T) {
done := make(chan int) done := make(chan int)
var genErr error var genErr error
go func() { go func() {
genErr = client.Generate(reqCtx, &genReq, genfn) genErr = client.Chat(reqCtx, &chatReq, chatfn)
done <- 0 done <- 0
}() }()
select { select {
case <-done: case <-done:
if genErr != nil && !atLeastOne { if genErr != nil && !atLeastOne {
t.Fatalf("failed with %s request prompt %s ", genReq.Model, genReq.Prompt) t.Fatalf("failed with %s request prompt %s ", chatReq.Model, chatReq.Messages[0].Content)
} }
case <-ctx.Done(): case <-ctx.Done():
t.Error("outer test context done while waiting for generate") t.Error("outer test context done while waiting for generate")
......
...@@ -15,6 +15,7 @@ import ( ...@@ -15,6 +15,7 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"runtime" "runtime"
"strconv" "strconv"
...@@ -24,7 +25,6 @@ import ( ...@@ -24,7 +25,6 @@ import (
"time" "time"
"github.com/ollama/ollama/api" "github.com/ollama/ollama/api"
"github.com/ollama/ollama/app/lifecycle"
"github.com/ollama/ollama/format" "github.com/ollama/ollama/format"
) )
...@@ -38,6 +38,7 @@ var ( ...@@ -38,6 +38,7 @@ var (
// Note: add newer models at the top of the list to test them first // Note: add newer models at the top of the list to test them first
ollamaEngineChatModels = []string{ ollamaEngineChatModels = []string{
"qwen3-coder:30b",
"gpt-oss:20b", "gpt-oss:20b",
"gemma3n:e2b", "gemma3n:e2b",
"mistral-small3.2:latest", "mistral-small3.2:latest",
...@@ -46,6 +47,7 @@ var ( ...@@ -46,6 +47,7 @@ var (
"qwen2.5-coder:latest", "qwen2.5-coder:latest",
"qwen2.5vl:3b", "qwen2.5vl:3b",
"qwen3:0.6b", // dense "qwen3:0.6b", // dense
"qwen3:1.7b", // dense
"qwen3:30b", // MOE "qwen3:30b", // MOE
"gemma3:1b", "gemma3:1b",
"llama3.1:latest", "llama3.1:latest",
...@@ -265,16 +267,16 @@ var ( ...@@ -265,16 +267,16 @@ var (
"Explain the physics involved in them. Be breif in your reply", "Explain the physics involved in them. Be breif in your reply",
"Explain the chemistry involved in them. Be breif in your reply", "Explain the chemistry involved in them. Be breif in your reply",
"What are common myths related to them? Be brief in your reply", "What are common myths related to them? Be brief in your reply",
"What are common fairytales related to them? Be brief in your reply",
"Can they form if there is no rain? Be breif in your reply", "Can they form if there is no rain? Be breif in your reply",
"Can they form if there are no clouds? Be breif in your reply", "Can they form if there are no clouds? Be breif in your reply",
"Do they happen on other planets? Be brief in your reply", "Do they happen on other planets? Be brief in your reply",
} }
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "end", "gold", "fortune", "blessing", "prosperity", "magic", "shower", "sky", "shimmer", "light", "storm", "sunny"} rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "shower", "sky", "shimmer", "light", "storm", "sunny", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
) )
func init() { func init() {
lifecycle.InitLogging() logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
slog.SetDefault(logger)
custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL") custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
if custom != "" { if custom != "" {
slog.Info("setting default test model to " + custom) slog.Info("setting default test model to " + custom)
...@@ -335,6 +337,7 @@ func GetTestEndpoint() (*api.Client, string) { ...@@ -335,6 +337,7 @@ func GetTestEndpoint() (*api.Client, string) {
var serverMutex sync.Mutex var serverMutex sync.Mutex
var serverReady bool var serverReady bool
var serverLogFile string
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error { func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
// Make sure the server has been built // Make sure the server has been built
...@@ -361,8 +364,9 @@ func startServer(t *testing.T, ctx context.Context, ollamaHost string) error { ...@@ -361,8 +364,9 @@ func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
t.Setenv("OLLAMA_HOST", ollamaHost) t.Setenv("OLLAMA_HOST", ollamaHost)
} }
logDir := t.TempDir()
slog.Info("starting server", "url", ollamaHost) slog.Info("starting server", "url", ollamaHost)
done, err := lifecycle.SpawnServer(ctx, "../ollama") done, err := SpawnServer(ctx, "../ollama", logDir)
if err != nil { if err != nil {
return fmt.Errorf("failed to start server: %w", err) return fmt.Errorf("failed to start server: %w", err)
} }
...@@ -385,6 +389,36 @@ func startServer(t *testing.T, ctx context.Context, ollamaHost string) error { ...@@ -385,6 +389,36 @@ func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
return nil return nil
} }
func SpawnServer(ctx context.Context, command, logDir string) (chan int, error) {
done := make(chan int)
fp, err := os.CreateTemp(logDir, "ollama-server-*.log")
if err != nil {
return nil, fmt.Errorf("failed to create log file: %w", err)
}
serverLogFile = fp.Name()
cmd := exec.CommandContext(ctx, command, "serve")
cmd.Stderr = fp
cmd.Stdout = fp
go func() {
slog.Info("starting server...")
if err := cmd.Run(); err != nil {
// "signal: killed" expected
if !strings.Contains(err.Error(), "signal") {
slog.Info("failed to run server", "error", err)
}
}
var code int
if cmd.ProcessState != nil {
code = cmd.ProcessState.ExitCode()
}
slog.Info("server exited")
done <- code
}()
return done, nil
}
func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error { func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error {
slog.Info("checking status of model", "model", modelName) slog.Info("checking status of model", "model", modelName)
showReq := &api.ShowRequest{Name: modelName} showReq := &api.ShowRequest{Name: modelName}
...@@ -445,12 +479,6 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin ...@@ -445,12 +479,6 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
client, testEndpoint := GetTestEndpoint() client, testEndpoint := GetTestEndpoint()
if os.Getenv("OLLAMA_TEST_EXISTING") == "" { if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
serverProcMutex.Lock() serverProcMutex.Lock()
fp, err := os.CreateTemp("", "ollama-server-*.log")
if err != nil {
t.Fatalf("failed to generate log file: %s", err)
}
lifecycle.ServerLogFile = fp.Name()
fp.Close()
if err := startServer(t, ctx, testEndpoint); err != nil { if err := startServer(t, ctx, testEndpoint); err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -478,36 +506,32 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin ...@@ -478,36 +506,32 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
if os.Getenv("OLLAMA_TEST_EXISTING") == "" { if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
defer serverProcMutex.Unlock() defer serverProcMutex.Unlock()
if t.Failed() { if t.Failed() {
fp, err := os.Open(lifecycle.ServerLogFile) fp, err := os.Open(serverLogFile)
if err != nil { if err != nil {
slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err) slog.Error("failed to open server log", "logfile", serverLogFile, "error", err)
return return
} }
defer fp.Close() defer fp.Close()
data, err := io.ReadAll(fp) data, err := io.ReadAll(fp)
if err != nil { if err != nil {
slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err) slog.Error("failed to read server log", "logfile", serverLogFile, "error", err)
return return
} }
slog.Warn("SERVER LOG FOLLOWS") slog.Warn("SERVER LOG FOLLOWS")
os.Stderr.Write(data) os.Stderr.Write(data)
slog.Warn("END OF SERVER") slog.Warn("END OF SERVER")
} }
err := os.Remove(lifecycle.ServerLogFile)
if err != nil && !os.IsNotExist(err) {
slog.Warn("failed to cleanup", "logfile", lifecycle.ServerLogFile, "error", err)
}
} }
} }
} }
func GenerateTestHelper(ctx context.Context, t *testing.T, genReq api.GenerateRequest, anyResp []string) { func ChatTestHelper(ctx context.Context, t *testing.T, req api.ChatRequest, anyResp []string) {
client, _, cleanup := InitServerConnection(ctx, t) client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup() defer cleanup()
if err := PullIfMissing(ctx, client, genReq.Model); err != nil { if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatal(err) t.Fatal(err)
} }
DoGenerate(ctx, t, client, genReq, anyResp, 30*time.Second, 10*time.Second) DoChat(ctx, t, client, req, anyResp, 30*time.Second, 10*time.Second)
} }
func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) []int { func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) []int {
...@@ -726,8 +750,14 @@ func skipIfNotGPULoaded(ctx context.Context, t *testing.T, client *api.Client, m ...@@ -726,8 +750,14 @@ func skipIfNotGPULoaded(ctx context.Context, t *testing.T, client *api.Client, m
loaded := []string{} loaded := []string{}
for _, m := range models.Models { for _, m := range models.Models {
loaded = append(loaded, m.Name) loaded = append(loaded, m.Name)
if m.Name != model { if strings.Contains(model, ":") {
continue if m.Name != model {
continue
}
} else if strings.Contains(m.Name, ":") {
if !strings.HasPrefix(m.Name, model+":") {
continue
}
} }
gpuPercent := 0 gpuPercent := 0
switch { switch {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment