utils_test.go 13.9 KB
Newer Older
1
2
3
4
5
6
7
//go:build integration

package integration

import (
	"bytes"
	"context"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
8
	"errors"
9
10
11
12
13
14
	"fmt"
	"io"
	"log/slog"
	"math/rand"
	"net"
	"net/http"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
15
	"net/url"
16
17
18
19
20
21
22
23
24
	"os"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"testing"
	"time"

25
26
	"github.com/ollama/ollama/api"
	"github.com/ollama/ollama/app/lifecycle"
27
	"github.com/ollama/ollama/format"
Daniel Hiltgen's avatar
Daniel Hiltgen committed
28
	"github.com/stretchr/testify/require"
29
30
)

31
32
33
34
const (
	smol = "llama3.2:1b"
)

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
var (
	started = time.Now()

	// Note: add newer models at the top of the list to test them first
	ollamaEngineChatModels = []string{
		"gemma3n:e2b",
		"mistral-small3.2:latest",
		"deepseek-r1:1.5b",
		"llama3.2-vision:latest",
		"qwen2.5-coder:latest",
		"qwen2.5vl:3b",
		"qwen3:0.6b", // dense
		"qwen3:30b",  // MOE
		"gemma3:1b",
		"llama3.1:latest",
		"llama3.2:latest",
		"gemma2:latest",
		"minicpm-v:latest",    // arch=qwen2
		"granite-code:latest", // arch=llama
	}
	llamaRunnerChatModels = []string{
		"mistral:latest",
		"falcon3:latest",
		"granite3-moe:latest",
		"command-r:latest",
		"nemotron-mini:latest",
		"phi3.5:latest",
		"solar-pro:latest",
		"internlm2:latest",
		"codellama:latest", // arch=llama
		"phi3:latest",
		"falcon2:latest",
		"gemma:latest",
		"llama2:latest",
		"nous-hermes:latest",
		"orca-mini:latest",
		"qwen:latest",
		"stablelm2:latest", // Predictions are off, crashes on small VRAM GPUs
		"falcon:latest",
	}
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

	// Some library models are quite large - ensure large VRAM and sufficient disk space
	// before running scenarios based on this set
	libraryChatModels = []string{
		"alfred",
		"athene-v2",
		"aya-expanse",
		"aya",
		"bakllava",
		"bespoke-minicheck",
		"codebooga",
		"codegeex4",
		"codegemma",
		"codellama",
		"codeqwen",
		"codestral",
		"codeup",
		"cogito",
		"command-a",
		"command-r-plus",
		"command-r",
		"command-r7b-arabic",
		"command-r7b",
		"dbrx",
		"deepcoder",
		"deepscaler",
		"deepseek-coder-v2",
		"deepseek-coder",
		"deepseek-llm",
		"deepseek-r1",
		// "deepseek-v2.5", // requires 155 GB VRAM
		"deepseek-v2",
		// "deepseek-v3", // requires 482 GB VRAM
		"devstral",
		"dolphin-llama3",
		"dolphin-mistral",
		"dolphin-mixtral",
		"dolphin-phi",
		"dolphin3",
		"dolphincoder",
		"duckdb-nsql",
		"everythinglm",
		"exaone-deep",
		"exaone3.5",
		"falcon",
		"falcon2",
		"falcon3",
		"firefunction-v2",
		"gemma",
		"gemma2",
		"gemma3",
		"gemma3n",
		"glm4",
		"goliath",
		"granite-code",
		"granite3-dense",
		"granite3-guardian",
		"granite3-moe",
		"granite3.1-dense",
		"granite3.1-moe",
		"granite3.2-vision",
		"granite3.2",
		"granite3.3",
		"hermes3",
		"internlm2",
		"llama-guard3",
		"llama-pro",
		"llama2-chinese",
		"llama2-uncensored",
		"llama2",
		"llama3-chatqa",
		"llama3-gradient",
		"llama3-groq-tool-use",
		"llama3.1",
		"llama3.2-vision",
		"llama3.2",
		"llama3.3",
		"llama3",
		"llama4",
		"llava-llama3",
		"llava-phi3",
		"llava",
		"magicoder",
		"magistral",
		"marco-o1",
		"mathstral",
		"meditron",
		"medllama2",
		"megadolphin",
		"minicpm-v",
		"mistral-large",
		"mistral-nemo",
		"mistral-openorca",
		"mistral-small",
		"mistral-small3.1",
		"mistral-small3.2",
		"mistral",
		"mistrallite",
		"mixtral",
		"moondream",
		"nemotron-mini",
		"nemotron",
		"neural-chat",
		"nexusraven",
		"notus",
		"nous-hermes",
		"nous-hermes2-mixtral",
		"nous-hermes2",
		"nuextract",
		"olmo2",
		"open-orca-platypus2",
		"openchat",
		"opencoder",
		"openhermes",
		"openthinker",
		"orca-mini",
		"orca2",
		// "phi", // unreliable
		"phi3.5",
		"phi3",
		"phi4-mini-reasoning",
		"phi4-mini",
		"phi4-reasoning",
		"phi4",
		"phind-codellama",
		"qwen",
		"qwen2-math",
		"qwen2.5-coder",
		"qwen2.5",
		"qwen2.5vl",
		"qwen2",
		"qwen3:0.6b", // dense
		"qwen3:30b",  // MOE
		"qwq",
		"r1-1776",
		"reader-lm",
		"reflection",
		"sailor2",
		"samantha-mistral",
		"shieldgemma",
		"smallthinker",
		"smollm",
		"smollm2",
		"solar-pro",
		"solar",
		"sqlcoder",
		"stable-beluga",
		"stable-code",
		"stablelm-zephyr",
		"stablelm2",
		"starcoder",
		"starcoder2",
		"starling-lm",
		"tinydolphin",
		"tinyllama",
		"tulu3",
		"vicuna",
		"wizard-math",
		"wizard-vicuna-uncensored",
		"wizard-vicuna",
		"wizardcoder",
		"wizardlm-uncensored",
		"wizardlm2",
		"xwinlm",
		"yarn-llama2",
		"yarn-mistral",
		"yi-coder",
		"yi",
		"zephyr",
	}
	libraryEmbedModels = []string{
		"all-minilm",
		"bge-large",
		"bge-m3",
		"granite-embedding",
		"mxbai-embed-large",
		"nomic-embed-text",
		"paraphrase-multilingual",
		"snowflake-arctic-embed",
		"snowflake-arctic-embed2",
	}
256
257
)

Daniel Hiltgen's avatar
Daniel Hiltgen committed
258
259
260
261
func Init() {
	lifecycle.InitLogging()
}

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
func FindPort() string {
	port := 0
	if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
		var l *net.TCPListener
		if l, err = net.ListenTCP("tcp", a); err == nil {
			port = l.Addr().(*net.TCPAddr).Port
			l.Close()
		}
	}
	if port == 0 {
		port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
	}
	return strconv.Itoa(port)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
277
func GetTestEndpoint() (*api.Client, string) {
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
	defaultPort := "11434"
	ollamaHost := os.Getenv("OLLAMA_HOST")

	scheme, hostport, ok := strings.Cut(ollamaHost, "://")
	if !ok {
		scheme, hostport = "http", ollamaHost
	}

	// trim trailing slashes
	hostport = strings.TrimRight(hostport, "/")

	host, port, err := net.SplitHostPort(hostport)
	if err != nil {
		host, port = "127.0.0.1", defaultPort
		if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
			host = ip.String()
		} else if hostport != "" {
			host = hostport
		}
	}

	if os.Getenv("OLLAMA_TEST_EXISTING") == "" && port == defaultPort {
		port = FindPort()
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
303
304
305
306
307
308
309
310
	slog.Info("server connection", "host", host, "port", port)

	return api.NewClient(
		&url.URL{
			Scheme: scheme,
			Host:   net.JoinHostPort(host, port),
		},
		http.DefaultClient), fmt.Sprintf("%s:%s", host, port)
311
312
313
314
315
}

var serverMutex sync.Mutex
var serverReady bool

Daniel Hiltgen's avatar
Daniel Hiltgen committed
316
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
	// Make sure the server has been built
	CLIName, err := filepath.Abs("../ollama")
	if err != nil {
		return err
	}

	if runtime.GOOS == "windows" {
		CLIName += ".exe"
	}
	_, err = os.Stat(CLIName)
	if err != nil {
		return fmt.Errorf("CLI missing, did you forget to build first?  %w", err)
	}
	serverMutex.Lock()
	defer serverMutex.Unlock()
	if serverReady {
		return nil
	}

	if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
		slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
Michael Yang's avatar
Michael Yang committed
338
		t.Setenv("OLLAMA_HOST", ollamaHost)
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
	}

	slog.Info("starting server", "url", ollamaHost)
	done, err := lifecycle.SpawnServer(ctx, "../ollama")
	if err != nil {
		return fmt.Errorf("failed to start server: %w", err)
	}

	go func() {
		<-ctx.Done()
		serverMutex.Lock()
		defer serverMutex.Unlock()
		exitCode := <-done
		if exitCode > 0 {
			slog.Warn("server failure", "exit", exitCode)
		}
		serverReady = false
	}()

	// TODO wait only long enough for the server to be responsive...
	time.Sleep(500 * time.Millisecond)

	serverReady = true
	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
365
func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
366
	slog.Info("checking status of model", "model", modelName)
367
368
	showReq := &api.ShowRequest{Name: modelName}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
369
370
	showCtx, cancel := context.WithDeadlineCause(
		ctx,
371
		time.Now().Add(20*time.Second),
Daniel Hiltgen's avatar
Daniel Hiltgen committed
372
373
374
375
376
377
378
379
380
		fmt.Errorf("show for existing model %s took too long", modelName),
	)
	defer cancel()
	_, err := client.Show(showCtx, showReq)
	var statusError api.StatusError
	switch {
	case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
		break
	case err != nil:
381
		return err
Daniel Hiltgen's avatar
Daniel Hiltgen committed
382
	default:
383
384
385
		slog.Info("model already present", "model", modelName)
		return nil
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
386
387
	slog.Info("model missing", "model", modelName)

388
	stallDuration := 60 * time.Second // This includes checksum verification, which can take a while on larger models, and slower systems
Daniel Hiltgen's avatar
Daniel Hiltgen committed
389
390
391
392
	stallTimer := time.NewTimer(stallDuration)
	fn := func(resp api.ProgressResponse) error {
		// fmt.Print(".")
		if !stallTimer.Reset(stallDuration) {
Michael Yang's avatar
lint  
Michael Yang committed
393
			return errors.New("stall was detected, aborting status reporting")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
394
395
396
		}
		return nil
	}
397

Daniel Hiltgen's avatar
Daniel Hiltgen committed
398
	stream := true
399
400
	pullReq := &api.PullRequest{Name: modelName, Stream: &stream}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
401
	var pullError error
402

Daniel Hiltgen's avatar
Daniel Hiltgen committed
403
404
405
406
407
408
409
410
	done := make(chan int)
	go func() {
		pullError = client.Pull(ctx, pullReq, fn)
		done <- 0
	}()

	select {
	case <-stallTimer.C:
Michael Yang's avatar
lint  
Michael Yang committed
411
		return errors.New("download stalled")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
412
413
	case <-done:
		return pullError
414
415
416
	}
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
417
418
var serverProcMutex sync.Mutex

Daniel Hiltgen's avatar
Daniel Hiltgen committed
419
420
421
422
423
424
425
426
427
428
429
430
// Returns an Client, the testEndpoint, and a cleanup function, fails the test on errors
// Starts the server if needed
func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, string, func()) {
	client, testEndpoint := GetTestEndpoint()
	if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
		serverProcMutex.Lock()
		fp, err := os.CreateTemp("", "ollama-server-*.log")
		if err != nil {
			t.Fatalf("failed to generate log file: %s", err)
		}
		lifecycle.ServerLogFile = fp.Name()
		fp.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
431
		require.NoError(t, startServer(t, ctx, testEndpoint))
432
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
433
434

	return client, testEndpoint, func() {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
435
436
437
438
439
440
441
442
		if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
			defer serverProcMutex.Unlock()
			if t.Failed() {
				fp, err := os.Open(lifecycle.ServerLogFile)
				if err != nil {
					slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
					return
				}
443
				defer fp.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
444
445
446
447
448
449
450
451
				data, err := io.ReadAll(fp)
				if err != nil {
					slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
					return
				}
				slog.Warn("SERVER LOG FOLLOWS")
				os.Stderr.Write(data)
				slog.Warn("END OF SERVER")
452
			}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
453
			err := os.Remove(lifecycle.ServerLogFile)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
454
455
			if err != nil && !os.IsNotExist(err) {
				slog.Warn("failed to cleanup", "logfile", lifecycle.ServerLogFile, "error", err)
456
457
458
			}
		}
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
459
}
460

Daniel Hiltgen's avatar
Daniel Hiltgen committed
461
462
463
464
465
466
func GenerateTestHelper(ctx context.Context, t *testing.T, genReq api.GenerateRequest, anyResp []string) {
	client, _, cleanup := InitServerConnection(ctx, t)
	defer cleanup()
	require.NoError(t, PullIfMissing(ctx, client, genReq.Model))
	DoGenerate(ctx, t, client, genReq, anyResp, 30*time.Second, 10*time.Second)
}
467

Daniel Hiltgen's avatar
Daniel Hiltgen committed
468
469
470
471
472
473
474
func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) {
	stallTimer := time.NewTimer(initialTimeout)
	var buf bytes.Buffer
	fn := func(response api.GenerateResponse) error {
		// fmt.Print(".")
		buf.Write([]byte(response.Response))
		if !stallTimer.Reset(streamTimeout) {
Michael Yang's avatar
lint  
Michael Yang committed
475
			return errors.New("stall was detected while streaming response, aborting")
Daniel Hiltgen's avatar
Daniel Hiltgen committed
476
477
		}
		return nil
478
479
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
480
481
482
483
484
485
486
487
	stream := true
	genReq.Stream = &stream
	done := make(chan int)
	var genErr error
	go func() {
		genErr = client.Generate(ctx, &genReq, fn)
		done <- 0
	}()
488

Daniel Hiltgen's avatar
Daniel Hiltgen committed
489
490
491
492
493
494
495
496
	select {
	case <-stallTimer.C:
		if buf.Len() == 0 {
			t.Errorf("generate never started.  Timed out after :%s", initialTimeout.String())
		} else {
			t.Errorf("generate stalled.  Response so far:%s", buf.String())
		}
	case <-done:
497
498
499
500
		if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
			slog.Warn("model is too large for the target test system", "model", genReq.Model, "error", genErr)
			return
		}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
501
502
503
504
505
506
507
508
509
510
		require.NoError(t, genErr, "failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
		// Verify the response contains the expected data
		response := buf.String()
		atLeastOne := false
		for _, resp := range anyResp {
			if strings.Contains(strings.ToLower(response), resp) {
				atLeastOne = true
				break
			}
		}
511
		require.True(t, atLeastOne, "%s: none of %v found in %s", genReq.Model, anyResp, response)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
512
513
514
		slog.Info("test pass", "model", genReq.Model, "prompt", genReq.Prompt, "contains", anyResp, "response", response)
	case <-ctx.Done():
		t.Error("outer test context done while waiting for generate")
515
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
516
}
517

Daniel Hiltgen's avatar
Daniel Hiltgen committed
518
// Generate a set of requests
519
// By default each request uses llama3.2 as the model
Daniel Hiltgen's avatar
Daniel Hiltgen committed
520
521
522
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
	return []api.GenerateRequest{
			{
523
				Model:     smol,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
524
525
526
				Prompt:    "why is the ocean blue?",
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
527
				Options: map[string]any{
Daniel Hiltgen's avatar
Daniel Hiltgen committed
528
529
530
531
					"seed":        42,
					"temperature": 0.0,
				},
			}, {
532
				Model:     smol,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
533
534
535
				Prompt:    "why is the color of dirt brown?",
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
536
				Options: map[string]any{
Daniel Hiltgen's avatar
Daniel Hiltgen committed
537
538
539
540
					"seed":        42,
					"temperature": 0.0,
				},
			}, {
541
				Model:     smol,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
542
543
544
				Prompt:    "what is the origin of the us thanksgiving holiday?",
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
545
				Options: map[string]any{
Daniel Hiltgen's avatar
Daniel Hiltgen committed
546
547
548
549
					"seed":        42,
					"temperature": 0.0,
				},
			}, {
550
				Model:     smol,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
551
552
553
				Prompt:    "what is the origin of independence day?",
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
554
				Options: map[string]any{
Daniel Hiltgen's avatar
Daniel Hiltgen committed
555
556
557
558
					"seed":        42,
					"temperature": 0.0,
				},
			}, {
559
				Model:     smol,
Daniel Hiltgen's avatar
Daniel Hiltgen committed
560
561
562
				Prompt:    "what is the composition of air?",
				Stream:    &stream,
				KeepAlive: &api.Duration{Duration: 10 * time.Second},
563
				Options: map[string]any{
Daniel Hiltgen's avatar
Daniel Hiltgen committed
564
565
566
567
568
569
					"seed":        42,
					"temperature": 0.0,
				},
			},
		},
		[][]string{
Michael Yang's avatar
Michael Yang committed
570
571
572
573
574
			{"sunlight"},
			{"soil", "organic", "earth", "black", "tan"},
			{"england", "english", "massachusetts", "pilgrims", "british"},
			{"fourth", "july", "declaration", "independence"},
			{"nitrogen", "oxygen", "carbon", "dioxide"},
575
		}
576
}
577
578
579
580
581
582
583
584
585
586
587
588

func skipUnderMinVRAM(t *testing.T, gb uint64) {
	// TODO use info API in the future
	if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
		maxVram, err := strconv.ParseUint(s, 10, 64)
		require.NoError(t, err)
		// Don't hammer on small VRAM cards...
		if maxVram < gb*format.GibiByte {
			t.Skip("skipping with small VRAM to avoid timeouts")
		}
	}
}
589
590
591
592
593
594
595
596
597
598
599

func getTimeouts(t *testing.T) (soft time.Duration, hard time.Duration) {
	deadline, hasDeadline := t.Deadline()
	if !hasDeadline {
		return 8 * time.Minute, 10 * time.Minute
	} else if deadline.Compare(time.Now().Add(2*time.Minute)) <= 0 {
		t.Skip("too little time")
		return time.Duration(0), time.Duration(0)
	}
	return -time.Since(deadline.Add(-2 * time.Minute)), -time.Since(deadline.Add(-20 * time.Second))
}