server_test.go 8.46 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
package llm

import (
	"context"
	"errors"
	"fmt"
	"strings"
	"testing"

	"github.com/ollama/ollama/api"
Jesse Gross's avatar
Jesse Gross committed
11
12
13
	"github.com/ollama/ollama/discover"
	"github.com/ollama/ollama/format"
	"github.com/ollama/ollama/ml"
14
15
16
	"golang.org/x/sync/semaphore"
)

Jesse Gross's avatar
Jesse Gross committed
17
18
func TestLLMServerFitGPU(t *testing.T) {
	type gpu struct {
19
20
		id   ml.DeviceID
		free int
Jesse Gross's avatar
Jesse Gross committed
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
	}

	tests := []struct {
		name        string
		gpus        []gpu
		layers      []int
		numGPU      int
		requireFull bool
		expected    ml.GPULayersList
		expectedErr error
	}{
		{
			name:     "No GPU",
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
			expected: ml.GPULayersList{},
		},
		{
			name:     "Full single GPU",
40
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
41
42
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
43
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
44
45
46
		},
		{
			name:     "Partial single GPU",
47
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
48
49
			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:   -1,
50
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
51
52
53
		},
		{
			name:     "Single GPU with numGPU 1",
54
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
55
56
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   1,
57
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
58
59
60
		},
		{
			name:     "Single GPU with numGPU 0",
61
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
62
63
64
65
66
67
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   0,
			expected: ml.GPULayersList{},
		},
		{
			name:     "Single GPU with numGPU 999",
68
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
69
70
			layers:   []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:   999,
71
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{0, 1, 2, 3}}},
Jesse Gross's avatar
Jesse Gross committed
72
73
74
		},
		{
			name:     "Multi GPU fits on one",
75
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
76
77
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
78
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
79
80
81
		},
		{
			name:     "Multi GPU split",
82
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
83
84
			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
85
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1, 2}}},
Jesse Gross's avatar
Jesse Gross committed
86
87
88
		},
		{
			name:     "Multi GPU partial",
89
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
90
91
			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
92
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
93
94
95
		},
		{
			name:     "Multi GPU numGPU 1",
96
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
97
98
			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   1,
99
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
100
101
102
		},
		{
			name:     "Multi GPU numGPU 2",
103
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
104
105
			layers:   []int{256 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   2,
106
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{1}}},
Jesse Gross's avatar
Jesse Gross committed
107
108
109
		},
		{
			name:     "Multi GPU numGPU 999",
110
			gpus:     []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
111
112
			layers:   []int{256 * format.MebiByte, 256 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   999,
113
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1"}, Layers: []int{0, 1}}, {DeviceID: ml.DeviceID{ID: "gpu0"}, Layers: []int{2}}},
Jesse Gross's avatar
Jesse Gross committed
114
115
116
		},
		{
			name:     "Multi GPU different libraries",
117
			gpus:     []gpu{{id: ml.DeviceID{Library: "CUDA", ID: "gpu0"}, free: 128 * format.MebiByte}, {id: ml.DeviceID{Library: "ROCm", ID: "gpu1"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
118
119
			layers:   []int{128 * format.MebiByte, 128 * format.MebiByte, 50 * format.MebiByte},
			numGPU:   -1,
120
			expected: ml.GPULayersList{{DeviceID: ml.DeviceID{ID: "gpu1", Library: "ROCm"}, Layers: []int{0, 1}}},
Jesse Gross's avatar
Jesse Gross committed
121
122
123
		},
		{
			name:        "requireFull",
124
			gpus:        []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
Jesse Gross's avatar
Jesse Gross committed
125
126
127
128
129
			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:      -1,
			requireFull: true,
			expectedErr: ErrLoadRequiredFull,
		},
130
131
132
133
134
135
136
137
		{
			name:        "requireFull numGPU",
			gpus:        []gpu{{id: ml.DeviceID{ID: "gpu0"}, free: 256 * format.MebiByte}},
			layers:      []int{100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte, 100 * format.MebiByte},
			numGPU:      4,
			requireFull: true,
			expectedErr: ErrLoadRequiredFull,
		},
Jesse Gross's avatar
Jesse Gross committed
138
139
140
141
142
143
144
145
146
147
148
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var systemInfo discover.SystemInfo
			systemInfo.System.TotalMemory = format.GibiByte
			systemInfo.System.FreeMemory = 512 * format.MebiByte
			systemInfo.System.FreeSwap = 256 * format.MebiByte

			gpus := make(discover.GpuInfoList, len(tt.gpus))
			for i := range tt.gpus {
149
				gpus[i].DeviceID = tt.gpus[i].id
Jesse Gross's avatar
Jesse Gross committed
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
				gpus[i].FreeMemory = uint64(tt.gpus[i].free)
			}

			s := &ollamaServer{
				llmServer: llmServer{
					totalLayers: uint64(len(tt.layers)),
					options: api.Options{
						Runner: api.Runner{
							NumGPU: tt.numGPU,
						},
					},
				},
			}

			s.mem = &ml.BackendMemory{CPU: ml.DeviceMemory{
165
166
				Weights: make([]uint64, s.totalLayers),
				Cache:   make([]uint64, s.totalLayers),
Jesse Gross's avatar
Jesse Gross committed
167
168
169
			}, GPUs: make([]ml.DeviceMemory, len(gpus))}

			for i := range tt.layers {
170
				s.mem.CPU.Weights[i] = uint64(tt.layers[i])
Jesse Gross's avatar
Jesse Gross committed
171
172
173
			}

			for i := range s.mem.GPUs {
174
				s.mem.GPUs[i].DeviceID = gpus[i].DeviceID
175
176
				s.mem.GPUs[i].Weights = make([]uint64, s.totalLayers)
				s.mem.GPUs[i].Cache = make([]uint64, s.totalLayers)
Jesse Gross's avatar
Jesse Gross committed
177
178
179
180
181
182
183
184
185
186
187
188
189
			}

			gpuLayers, err := s.createLayout(systemInfo, gpus, s.mem, tt.requireFull, 0)
			if err != tt.expectedErr {
				t.Fatalf("fitGPU returned error: %v", err)
			}
			if gpuLayers.Hash() != tt.expected.Hash() {
				t.Errorf("fitGPU assigned %v, want %v", gpuLayers, tt.expected)
			}
		})
	}
}

190
191
192
193
194
func TestLLMServerCompletionFormat(t *testing.T) {
	// This test was written to fix an already deployed issue. It is a bit
	// of a mess, and but it's good enough, until we can refactoring the
	// Completion method to be more testable.

195
	ctx, cancel := context.WithCancel(t.Context())
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
	s := &llmServer{
		sem: semaphore.NewWeighted(1), // required to prevent nil panic
	}

	checkInvalid := func(format string) {
		t.Helper()
		err := s.Completion(ctx, CompletionRequest{
			Options: new(api.Options),
			Format:  []byte(format),
		}, nil)

		want := fmt.Sprintf("invalid format: %q; expected \"json\" or a valid JSON Schema", format)
		if err == nil || !strings.Contains(err.Error(), want) {
			t.Fatalf("err = %v; want %q", err, want)
		}
	}

	checkInvalid("X")   // invalid format
	checkInvalid(`"X"`) // invalid JSON Schema

	cancel() // prevent further processing if request makes it past the format check

218
	checkValid := func(err error) {
219
220
221
222
223
224
		t.Helper()
		if !errors.Is(err, context.Canceled) {
			t.Fatalf("Completion: err = %v; expected context.Canceled", err)
		}
	}

225
226
227
228
229
230
231
232
233
234
	valids := []string{
		// "missing"
		``,
		`""`,
		`null`,

		// JSON
		`"json"`,
		`{"type":"object"}`,
	}
235
236
237
238
239
	for _, valid := range valids {
		err := s.Completion(ctx, CompletionRequest{
			Options: new(api.Options),
			Format:  []byte(valid),
		}, nil)
240
		checkValid(err)
241
242
243
244
245
246
	}

	err := s.Completion(ctx, CompletionRequest{
		Options: new(api.Options),
		Format:  nil, // missing format
	}, nil)
247
	checkValid(err)
248
}