server_test.go 8.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
package llm

import (
	"context"
	"errors"
	"fmt"
	"strings"
	"testing"

	"github.com/ollama/ollama/api"
Jesse Gross's avatar
Jesse Gross committed
11
12
	"github.com/ollama/ollama/format"
	"github.com/ollama/ollama/ml"
13
14
15
	"golang.org/x/sync/semaphore"
)

Jesse Gross's avatar
Jesse Gross committed
16
17
func TestLLMServerFitGPU(t *testing.T) {
	type gpu struct {
18
19
		id   ml.DeviceID
		free int
Jesse Gross's avatar
Jesse Gross committed
20
21
	}

22
23
	minMemory := 457 * format.MebiByte

Jesse Gross's avatar
Jesse Gross committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
	tests := []struct {
		name        string
		gpus        []gpu
		layers      []int
		numGPU      int
		requireFull bool
		expected    ml.GPULayersList
		expectedErr error
	}{
		{
			name:     "No GPU",
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
			expected: ml.GPULayersList{},
		},
		{
			name:     "Full single GPU",
41
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
42
43
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
44
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
45
46
47
		},
		{
			name:     "Partial single GPU",
48
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
49
50
			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:   -1,
51
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
52
53
54
		},
		{
			name:     "Single GPU with numGPU 1",
55
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
56
57
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   1,
58
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
59
60
61
		},
		{
			name:     "Single GPU with numGPU 0",
62
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
63
64
65
66
67
68
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   0,
			expected: ml.GPULayersList{},
		},
		{
			name:     "Single GPU with numGPU 999",
69
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
70
71
			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:   999,
72
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2, 3}}},
Jesse Gross's avatar
Jesse Gross committed
73
74
75
		},
		{
			name:     "Multi GPU fits on one",
76
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
77
78
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
79
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
80
81
82
		},
		{
			name:     "Multi GPU split",
83
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
84
85
			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
86
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
87
88
89
		},
		{
			name:     "Multi GPU partial",
90
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
91
92
			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
93
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
94
95
96
		},
		{
			name:     "Multi GPU numGPU 1",
97
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
98
99
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   1,
100
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
101
102
103
		},
		{
			name:     "Multi GPU numGPU 2",
104
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
105
106
			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   2,
107
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
108
109
110
		},
		{
			name:     "Multi GPU numGPU 999",
111
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
112
113
			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   999,
114
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
Jesse Gross's avatar
Jesse Gross committed
115
116
117
		},
		{
			name:     "Multi GPU different libraries",
118
			gpus:     []gpu{{id: ml.DeviceID{Library: "CUDA", ID: "gpu0"}, free: 128*format.MebiByte + minMemory}, {id: ml.DeviceID{Library: "ROCm", ID: "gpu1"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
119
120
			layers:   []int{128 * format.MebiByte, 128 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
121
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1", Library: "ROCm"}, Layers: []int{0, 1}}},
Jesse Gross's avatar
Jesse Gross committed
122
123
124
		},
		{
			name:        "requireFull",
125
			gpus:        []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256*format.MebiByte + minMemory}},
Jesse Gross's avatar
Jesse Gross committed
126
127
128
129
130
			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:      -1,
			requireFull: true,
			expectedErr: ErrLoadRequiredFull,
		},
131
132
133
134
135
136
137
138
		{
			name:        "requireFull numGPU",
			gpus:        []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:      4,
			requireFull: true,
			expectedErr: ErrLoadRequiredFull,
		},
Jesse Gross's avatar
Jesse Gross committed
139
140
141
142
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
143
144
145
146
			var systemInfo ml.SystemInfo
			systemInfo.TotalMemory = format.GibiByte
			systemInfo.FreeMemory = 512 * format.MebiByte
			systemInfo.FreeSwap = 256 * format.MebiByte
Jesse Gross's avatar
Jesse Gross committed
147

148
			gpus := make([]ml.DeviceInfo, len(tt.gpus))
Jesse Gross's avatar
Jesse Gross committed
149
			for i := range tt.gpus {
150
				gpus[i].DeviceID = tt.gpus[i].id
Jesse Gross's avatar
Jesse Gross committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
				gpus[i].FreeMemory = uint64(tt.gpus[i].free)
			}

			s := &ollamaServer{
				llmServer: llmServer{
					totalLayers: uint64(len(tt.layers)),
					options: api.Options{
						Runner: api.Runner{
							NumGPU: tt.numGPU,
						},
					},
				},
			}

			s.mem = &ml.BackendMemory{CPU: ml.DeviceMemory{
166
167
				Weights: make([]uint64, s.totalLayers),
				Cache:   make([]uint64, s.totalLayers),
Jesse Gross's avatar
Jesse Gross committed
168
169
170
			}, GPUs: make([]ml.DeviceMemory, len(gpus))}

			for i := range tt.layers {
171
				s.mem.CPU.Weights[i] = uint64(tt.layers[i])
Jesse Gross's avatar
Jesse Gross committed
172
173
174
			}

			for i := range s.mem.GPUs {
175
				s.mem.GPUs[i].DeviceID = gpus[i].DeviceID
176
177
				s.mem.GPUs[i].Weights = make([]uint64, s.totalLayers)
				s.mem.GPUs[i].Cache = make([]uint64, s.totalLayers)
Jesse Gross's avatar
Jesse Gross committed
178
179
180
181
182
183
184
185
186
187
188
189
190
			}

			gpuLayers, err := s.createLayout(systemInfo, gpus, s.mem, tt.requireFull, 0)
			if err != tt.expectedErr {
				t.Fatalf("fitGPU returned error: %v", err)
			}
			if gpuLayers.Hash() != tt.expected.Hash() {
				t.Errorf("fitGPU assigned %v, want %v", gpuLayers, tt.expected)
			}
		})
	}
}

191
192
193
194
195
func TestLLMServerCompletionFormat(t *testing.T) {
	// This test was written to fix an already deployed issue. It is a bit
	// of a mess, and but it's good enough, until we can refactoring the
	// Completion method to be more testable.

196
	ctx, cancel := context.WithCancel(t.Context())
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
	s := &llmServer{
		sem: semaphore.NewWeighted(1), // required to prevent nil panic
	}

	checkInvalid := func(format string) {
		t.Helper()
		err := s.Completion(ctx, CompletionRequest{
			Options: new(api.Options),
			Format:  []byte(format),
		}, nil)

		want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
		if err == nil || !strings.Contains(err.Error(), want) {
			t.Fatalf("err = %v; want %q", err, want)
		}
	}

	checkInvalid("X")   // invalid format
	checkInvalid(`"X"`) // invalid JSON Schema

	cancel() // prevent further processing if request makes it past the format check

219
	checkValid := func(err error) {
220
221
222
223
224
225
		t.Helper()
		if !errors.Is(err, context.Canceled) {
			t.Fatalf("Completion: err = %v; expected context.Canceled", err)
		}
	}

226
227
228
229
230
231
232
233
234
235
	valids := []string{
		// "missing"
		``,
		`""`,
		`null`,

		// JSON
		`"json"`,
		`{"type":"object"}`,
	}
236
237
238
239
240
	for _, valid := range valids {
		err := s.Completion(ctx, CompletionRequest{
			Options: new(api.Options),
			Format:  []byte(valid),
		}, nil)
241
		checkValid(err)
242
243
244
245
246
247
	}

	err := s.Completion(ctx, CompletionRequest{
		Options: new(api.Options),
		Format:  nil, // missing format
	}, nil)
248
	checkValid(err)
249
}