utils_test.go 14.4 KB
Newer Older
1
2
3
4
5
6
7
//go:build integration

package integration

import (
	"bytes"
	"context"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
8
	"errors"
9
10
11
12
13
14
	"fmt"
	"io"
	"log/slog"
	"math/rand"
	"net"
	"net/http"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
15
	"net/url"
16
17
18
19
20
21
22
23
24
	"os"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"testing"
	"time"

25
26
	"github.com/ollama/ollama/api"
	"github.com/ollama/ollama/app/lifecycle"
27
	"github.com/ollama/ollama/format"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
	"github.com/stretchr/testify/require"
29
30
)

31
var (
32
33
34
	smol = "llama3.2:1b"
)

35
36
37
38
39
var (
	started = time.Now()

	// Note: add newer models at the top of the list to test them first
	ollamaEngineChatModels = []string{
40
		"gpt-oss:20b",
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
		"gemma3n:e2b",
		"mistral-small3.2:latest",
		"deepseek-r1:1.5b",
		"llama3.2-vision:latest",
		"qwen2.5-coder:latest",
		"qwen2.5vl:3b",
		"qwen3:0.6b", // dense
		"qwen3:30b",  // MOE
		"gemma3:1b",
		"llama3.1:latest",
		"llama3.2:latest",
		"gemma2:latest",
		"minicpm-v:latest",    // arch=qwen2
		"granite-code:latest", // arch=llama
	}
	llamaRunnerChatModels = []string{
		"mistral:latest",
		"falcon3:latest",
		"granite3-moe:latest",
		"command-r:latest",
		"nemotron-mini:latest",
		"phi3.5:latest",
		"solar-pro:latest",
		"internlm2:latest",
		"codellama:latest", // arch=llama
		"phi3:latest",
		"falcon2:latest",
		"gemma:latest",
		"llama2:latest",
		"nous-hermes:latest",
		"orca-mini:latest",
		"qwen:latest",
		"stablelm2:latest", // Predictions are off, crashes on small VRAM GPUs
		"falcon:latest",
	}
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

	// Some library models are quite large - ensure large VRAM and sufficient disk space
	// before running scenarios based on this set
	libraryChatModels = []string{
		"alfred",
		"athene-v2",
		"aya-expanse",
		"aya",
		"bakllava",
		"bespoke-minicheck",
		"codebooga",
		"codegeex4",
		"codegemma",
		"codellama",
		"codeqwen",
		"codestral",
		"codeup",
		"cogito",
		"command-a",
		"command-r-plus",
		"command-r",
		"command-r7b-arabic",
		"command-r7b",
		"dbrx",
		"deepcoder",
		"deepscaler",
		"deepseek-coder-v2",
		"deepseek-coder",
		"deepseek-llm",
		"deepseek-r1",
		// "deepseek-v2.5", // requires 155 GB VRAM
		"deepseek-v2",
		// "deepseek-v3", // requires 482 GB VRAM
		"devstral",
		"dolphin-llama3",
		"dolphin-mistral",
		"dolphin-mixtral",
		"dolphin-phi",
		"dolphin3",
		"dolphincoder",
		"duckdb-nsql",
		"everythinglm",
		"exaone-deep",
		"exaone3.5",
		"falcon",
		"falcon2",
		"falcon3",
		"firefunction-v2",
		"gemma",
		"gemma2",
		"gemma3",
		"gemma3n",
		"glm4",
		"goliath",
130
		"gpt-oss:20b",
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
		"granite-code",
		"granite3-dense",
		"granite3-guardian",
		"granite3-moe",
		"granite3.1-dense",
		"granite3.1-moe",
		"granite3.2-vision",
		"granite3.2",
		"granite3.3",
		"hermes3",
		"internlm2",
		"llama-guard3",
		"llama-pro",
		"llama2-chinese",
		"llama2-uncensored",
		"llama2",
		"llama3-chatqa",
		"llama3-gradient",
		"llama3-groq-tool-use",
		"llama3.1",
		"llama3.2-vision",
		"llama3.2",
		"llama3.3",
		"llama3",
		"llama4",
		"llava-llama3",
		"llava-phi3",
		"llava",
		"magicoder",
		"magistral",
		"marco-o1",
		"mathstral",
		"meditron",
		"medllama2",
		"megadolphin",
		"minicpm-v",
		"mistral-large",
		"mistral-nemo",
		"mistral-openorca",
		"mistral-small",
		"mistral-small3.1",
		"mistral-small3.2",
		"mistral",
		"mistrallite",
		"mixtral",
		"moondream",
		"nemotron-mini",
		"nemotron",
		"neural-chat",
		"nexusraven",
		"notus",
		"nous-hermes",
		"nous-hermes2-mixtral",
		"nous-hermes2",
		"nuextract",
		"olmo2",
		"open-orca-platypus2",
		"openchat",
		"opencoder",
		"openhermes",
		"openthinker",
		"orca-mini",
		"orca2",
		// "phi", // unreliable
		"phi3.5",
		"phi3",
		"phi4-mini-reasoning",
		"phi4-mini",
		"phi4-reasoning",
		"phi4",
		"phind-codellama",
		"qwen",
		"qwen2-math",
		"qwen2.5-coder",
		"qwen2.5",
		"qwen2.5vl",
		"qwen2",
		"qwen3:0.6b", // dense
		"qwen3:30b",  // MOE
		"qwq",
		"r1-1776",
		"reader-lm",
		"reflection",
		"sailor2",
		"samantha-mistral",
		"shieldgemma",
		"smallthinker",
		"smollm",
		"smollm2",
		"solar-pro",
		"solar",
		"sqlcoder",
		"stable-beluga",
		"stable-code",
		"stablelm-zephyr",
		"stablelm2",
		"starcoder",
		"starcoder2",
		"starling-lm",
		"tinydolphin",
		"tinyllama",
		"tulu3",
		"vicuna",
		"wizard-math",
		"wizard-vicuna-uncensored",
		"wizard-vicuna",
		"wizardcoder",
		"wizardlm-uncensored",
		"wizardlm2",
		"xwinlm",
		"yarn-llama2",
		"yarn-mistral",
		"yi-coder",
		"yi",
		"zephyr",
	}
	libraryEmbedModels = []string{
		"all-minilm",
		"bge-large",
		"bge-m3",
		"granite-embedding",
		"mxbai-embed-large",
		"nomic-embed-text",
		"paraphrase-multilingual",
		"snowflake-arctic-embed",
		"snowflake-arctic-embed2",
	}
258
259
)

260
func init() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
261
	lifecycle.InitLogging()
262
263
264
265
266
	custom := os.Getenv("OLLAMA_TEST_SMOL_MODEL")
	if custom != "" {
		slog.Info("setting smol test model to " + custom)
		smol = custom
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
267
268
}

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
func FindPort() string {
	port := 0
	if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
		var l *net.TCPListener
		if l, err = net.ListenTCP("tcp", a); err == nil {
			port = l.Addr().(*net.TCPAddr).Port
			l.Close()
		}
	}
	if port == 0 {
		port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
	}
	return strconv.Itoa(port)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
284
func GetTestEndpoint() (*api.Client, string) {
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
	defaultPort := "11434"
	ollamaHost := os.Getenv("OLLAMA_HOST")

	scheme, hostport, ok := strings.Cut(ollamaHost, "://")
	if !ok {
		scheme, hostport = "http", ollamaHost
	}

	// trim trailing slashes
	hostport = strings.TrimRight(hostport, "/")

	host, port, err := net.SplitHostPort(hostport)
	if err != nil {
		host, port = "127.0.0.1", defaultPort
		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
			host = ip.String()
		} else if hostport != "" {
			host = hostport
		}
	}

	if os.Getenv("OLLAMA_TEST_EXISTING") == "" && port == defaultPort {
		port = FindPort()
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
310
311
312
313
314
315
316
317
	slog.Info("server connection", "host", host, "port", port)

	return api.NewClient(
		&url.URL{
			Scheme: scheme,
			Host:   net.JoinHostPort(host, port),
		},
		http.DefaultClient), fmt.Sprintf("%s:%s", host, port)
318
319
320
321
322
}

var serverMutex sync.Mutex
var serverReady bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
323
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
	// Make sure the server has been built
	CLIName, err := filepath.Abs("../ollama")
	if err != nil {
		return err
	}

	if runtime.GOOS == "windows" {
		CLIName += ".exe"
	}
	_, err = os.Stat(CLIName)
	if err != nil {
		return fmt.Errorf("CLI missing, did you forget to build first?  %w", err)
	}
	serverMutex.Lock()
	defer serverMutex.Unlock()
	if serverReady {
		return nil
	}

	if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
		slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
Michael Yang's avatar
Michael Yang committed
345
		t.Setenv("OLLAMA_HOST", ollamaHost)
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
	}

	slog.Info("starting server", "url", ollamaHost)
	done, err := lifecycle.SpawnServer(ctx, "../ollama")
	if err != nil {
		return fmt.Errorf("failed to start server: %w", err)
	}

	go func() {
		<-ctx.Done()
		serverMutex.Lock()
		defer serverMutex.Unlock()
		exitCode := <-done
		if exitCode > 0 {
			slog.Warn("server failure", "exit", exitCode)
		}
		serverReady = false
	}()

	// TODO wait only long enough for the server to be responsive...
	time.Sleep(500 * time.Millisecond)

	serverReady = true
	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
372
func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
373
	slog.Info("checking status of model", "model", modelName)
374
375
	showReq := &api.ShowRequest{Name: modelName}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
376
377
	showCtx, cancel := context.WithDeadlineCause(
		ctx,
378
		time.Now().Add(20*time.Second),
Daniel Hiltgen's avatar
Daniel Hiltgen committed
379
380
381
382
383
384
385
386
387
		fmt.Errorf("show for existing model %s took too long", modelName),
	)
	defer cancel()
	_, err := client.Show(showCtx, showReq)
	var statusError api.StatusError
	switch {
	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
		break
	case err != nil:
388
		return err
Daniel Hiltgen's avatar
Daniel Hiltgen committed
389
	default:
390
391
392
		slog.Info("model already present", "model", modelName)
		return nil
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
393
394
	slog.Info("model missing", "model", modelName)

395
	stallDuration := 60 * time.Second // This includes checksum verification, which can take a while on larger models, and slower systems
Daniel Hiltgen's avatar
Daniel Hiltgen committed
396
397
398
399
	stallTimer := time.NewTimer(stallDuration)
	fn := func(resp api.ProgressResponse) error {
		// fmt.Print(".")
		if !stallTimer.Reset(stallDuration) {
Michael Yang's avatar
lint  
Michael Yang committed
400
			return errors.New("stall was detected, aborting status reporting")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
401
402
403
		}
		return nil
	}
404

Daniel Hiltgen's avatar
Daniel Hiltgen committed
405
	stream := true
406
407
	pullReq := &api.PullRequest{Name: modelName, Stream: &stream}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
408
	var pullError error
409

Daniel Hiltgen's avatar
Daniel Hiltgen committed
410
411
412
413
414
415
416
417
	done := make(chan int)
	go func() {
		pullError = client.Pull(ctx, pullReq, fn)
		done <- 0
	}()

	select {
	case <-stallTimer.C:
Michael Yang's avatar
lint  
Michael Yang committed
418
		return errors.New("download stalled")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
419
420
	case <-done:
		return pullError
421
422
423
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
424
425
var serverProcMutex sync.Mutex

Daniel Hiltgen's avatar
Daniel Hiltgen committed
426
427
428
429
430
431
432
433
434
435
436
437
// Returns an Client, the testEndpoint, and a cleanup function, fails the test on errors
// Starts the server if needed
func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, string, func()) {
	client, testEndpoint := GetTestEndpoint()
	if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
		serverProcMutex.Lock()
		fp, err := os.CreateTemp("", "ollama-server-*.log")
		if err != nil {
			t.Fatalf("failed to generate log file: %s", err)
		}
		lifecycle.ServerLogFile = fp.Name()
		fp.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
438
		require.NoError(t, startServer(t, ctx, testEndpoint))
439
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
440
441

	return client, testEndpoint, func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
442
443
444
445
446
447
448
449
		if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
			defer serverProcMutex.Unlock()
			if t.Failed() {
				fp, err := os.Open(lifecycle.ServerLogFile)
				if err != nil {
					slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
					return
				}
450
				defer fp.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
451
452
453
454
455
456
457
458
				data, err := io.ReadAll(fp)
				if err != nil {
					slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
					return
				}
				slog.Warn("SERVER LOG FOLLOWS")
				os.Stderr.Write(data)
				slog.Warn("END OF SERVER")
459
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
460
			err := os.Remove(lifecycle.ServerLogFile)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
461
462
			if err != nil && !os.IsNotExist(err) {
				slog.Warn("failed to cleanup", "logfile", lifecycle.ServerLogFile, "error", err)
463
464
465
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
466
}
467

Daniel Hiltgen's avatar
Daniel Hiltgen committed
468
469
470
471
472
473
func GenerateTestHelper(ctx context.Context, t *testing.T, genReq api.GenerateRequest, anyResp []string) {
	client, _, cleanup := InitServerConnection(ctx, t)
	defer cleanup()
	require.NoError(t, PullIfMissing(ctx, client, genReq.Model))
	DoGenerate(ctx, t, client, genReq, anyResp, 30*time.Second, 10*time.Second)
}
474

475
func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) []int {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
476
477
	stallTimer := time.NewTimer(initialTimeout)
	var buf bytes.Buffer
478
	var context []int
Daniel Hiltgen's avatar
Daniel Hiltgen committed
479
480
481
482
	fn := func(response api.GenerateResponse) error {
		// fmt.Print(".")
		buf.Write([]byte(response.Response))
		if !stallTimer.Reset(streamTimeout) {
Michael Yang's avatar
lint  
Michael Yang committed
483
			return errors.New("stall was detected while streaming response, aborting")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
484
		}
485
486
487
		if len(response.Context) > 0 {
			context = response.Context
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
488
		return nil
489
490
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
491
492
493
494
495
496
497
498
	stream := true
	genReq.Stream = &stream
	done := make(chan int)
	var genErr error
	go func() {
		genErr = client.Generate(ctx, &genReq, fn)
		done <- 0
	}()
499

Daniel Hiltgen's avatar
Daniel Hiltgen committed
500
501
502
503
504
505
506
507
	select {
	case <-stallTimer.C:
		if buf.Len() == 0 {
			t.Errorf("generate never started.  Timed out after :%s", initialTimeout.String())
		} else {
			t.Errorf("generate stalled.  Response so far:%s", buf.String())
		}
	case <-done:
508
509
		if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
			slog.Warn("model is too large for the target test system", "model", genReq.Model, "error", genErr)
510
			return context
511
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
512
513
514
515
516
517
518
519
520
521
		require.NoError(t, genErr, "failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
		// Verify the response contains the expected data
		response := buf.String()
		atLeastOne := false
		for _, resp := range anyResp {
			if strings.Contains(strings.ToLower(response), resp) {
				atLeastOne = true
				break
			}
		}
522
		require.True(t, atLeastOne, "%s: none of %v found in %s", genReq.Model, anyResp, response)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
523
524
525
		slog.Info("test pass", "model", genReq.Model, "prompt", genReq.Prompt, "contains", anyResp, "response", response)
	case <-ctx.Done():
		t.Error("outer test context done while waiting for generate")
526
	}
527
	return context
Daniel Hiltgen's avatar
Daniel Hiltgen committed
528
}
529

Daniel Hiltgen's avatar
Daniel Hiltgen committed
530
// Generate a set of requests
531
// By default each request uses llama3.2 as the model
Daniel Hiltgen's avatar
Daniel Hiltgen committed
532
533
534
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
	return []api.GenerateRequest{
			{
535
				Model:     smol,
536
				Prompt:    "why is the ocean blue? Be brief but factual in your reply",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
537
538
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
539
			}, {
540
				Model:     smol,
541
				Prompt:    "why is the color of dirt brown? Be brief but factual in your reply",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
542
543
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
544
			}, {
545
				Model:     smol,
546
				Prompt:    "what is the origin of the US thanksgiving holiday? Be brief but factual in your reply",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
547
548
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
549
			}, {
550
				Model:     smol,
551
				Prompt:    "what is the origin of independence day? Be brief but factual in your reply",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
552
553
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
554
			}, {
555
				Model:     smol,
556
				Prompt:    "what is the composition of air? Be brief but factual in your reply",
Daniel Hiltgen's avatar
Daniel Hiltgen committed
557
558
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
Daniel Hiltgen's avatar
Daniel Hiltgen committed
559
560
561
			},
		},
		[][]string{
562
563
564
			{"sunlight", "scattering", "interact", "color", "surface", "depth", "red", "orange", "yellow", "absorbs", "wavelength"},
			{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigments", "particles", "iron oxide", "rust", "air", "water", "mixture", "mixing"},
			{"england", "english", "massachusetts", "pilgrims", "colonists", "independence", "british", "feast", "family", "gatherings", "traditions", "turkey", "colonial", "period", "harvest", "agricultural", "european settlers", "american revolution", "civil war", "16th century", "17th century", "native american", "united states"},
Michael Yang's avatar
Michael Yang committed
565
566
			{"fourth", "july", "declaration", "independence"},
			{"nitrogen", "oxygen", "carbon", "dioxide"},
567
		}
568
}
569
570
571
572
573
574
575
576
577
578
579
580

func skipUnderMinVRAM(t *testing.T, gb uint64) {
	// TODO use info API in the future
	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
		maxVram, err := strconv.ParseUint(s, 10, 64)
		require.NoError(t, err)
		// Don't hammer on small VRAM cards...
		if maxVram < gb*format.GibiByte {
			t.Skip("skipping with small VRAM to avoid timeouts")
		}
	}
}
581
582
583
584
585
586
587
588
589
590
591

func getTimeouts(t *testing.T) (soft time.Duration, hard time.Duration) {
	deadline, hasDeadline := t.Deadline()
	if !hasDeadline {
		return 8 * time.Minute, 10 * time.Minute
	} else if deadline.Compare(time.Now().Add(2*time.Minute)) <= 0 {
		t.Skip("too little time")
		return time.Duration(0), time.Duration(0)
	}
	return -time.Since(deadline.Add(-2 * time.Minute)), -time.Since(deadline.Add(-20 * time.Second))
}