"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "0ce091a82d29bd6c0ea6564bc372311d14b6f5eb"
types.go 13.7 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
package api

3
import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"fmt"
Michael Yang's avatar
Michael Yang committed
6
	"math"
7
	"os"
8
	"reflect"
9
	"strconv"
10
	"strings"
11
12
	"time"
)
Michael Yang's avatar
Michael Yang committed
13

Patrick Devine's avatar
Patrick Devine committed
14
type StatusError struct {
15
16
17
	StatusCode   int
	Status       string
	ErrorMessage string `json:"error"`
Patrick Devine's avatar
Patrick Devine committed
18
19
20
}

func (e StatusError) Error() string {
21
22
23
24
25
26
27
28
29
30
	switch {
	case e.Status != "" && e.ErrorMessage != "":
		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
	case e.Status != "":
		return e.Status
	case e.ErrorMessage != "":
		return e.ErrorMessage
	default:
		// this should not happen
		return "something went wrong, please see the ollama server logs for details"
Patrick Devine's avatar
Patrick Devine committed
31
32
33
	}
}

Patrick Devine's avatar
Patrick Devine committed
34
35
type ImageData []byte

36
type GenerateRequest struct {
37
38
39
40
41
42
43
44
45
46
	Model     string      `json:"model"`
	Prompt    string      `json:"prompt"`
	System    string      `json:"system"`
	Template  string      `json:"template"`
	Context   []int       `json:"context,omitempty"`
	Stream    *bool       `json:"stream,omitempty"`
	Raw       bool        `json:"raw,omitempty"`
	Format    string      `json:"format"`
	KeepAlive *Duration   `json:"keep_alive,omitempty"`
	Images    []ImageData `json:"images,omitempty"`
47

48
	Options map[string]interface{} `json:"options"`
49
50
}

Bruce MacDonald's avatar
Bruce MacDonald committed
51
type ChatRequest struct {
52
53
54
55
56
	Model     string    `json:"model"`
	Messages  []Message `json:"messages"`
	Stream    *bool     `json:"stream,omitempty"`
	Format    string    `json:"format"`
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
57
58
59
60
61

	Options map[string]interface{} `json:"options"`
}

type Message struct {
62
63
	Role    string      `json:"role"` // one of ["system", "user", "assistant"]
	Content string      `json:"content"`
64
	Images  []ImageData `json:"images,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
65
66
67
68
69
}

type ChatResponse struct {
	Model     string    `json:"model"`
	CreatedAt time.Time `json:"created_at"`
70
	Message   Message   `json:"message"`
Bruce MacDonald's avatar
Bruce MacDonald committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

	Done bool `json:"done"`

	Metrics
}

type Metrics struct {
	TotalDuration      time.Duration `json:"total_duration,omitempty"`
	LoadDuration       time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
	PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
	EvalCount          int           `json:"eval_count,omitempty"`
	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
}

86
// Options specified in GenerateRequest, if you add a new option here add it to the API docs also
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
type Options struct {
	Runner

	// Predict options used at runtime
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float32  `json:"top_p,omitempty"`
	TFSZ             float32  `json:"tfs_z,omitempty"`
	TypicalP         float32  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float32  `json:"temperature,omitempty"`
	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
	Mirostat         int      `json:"mirostat,omitempty"`
	MirostatTau      float32  `json:"mirostat_tau,omitempty"`
	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
	Stop             []string `json:"stop,omitempty"`
}

// Runner options which must be set when the model is loaded into memory
type Runner struct {
Michael Yang's avatar
Michael Yang committed
112
113
114
115
116
117
118
119
120
121
122
123
124
	UseNUMA   bool `json:"numa,omitempty"`
	NumCtx    int  `json:"num_ctx,omitempty"`
	NumBatch  int  `json:"num_batch,omitempty"`
	NumGQA    int  `json:"num_gqa,omitempty"`
	NumGPU    int  `json:"num_gpu,omitempty"`
	MainGPU   int  `json:"main_gpu,omitempty"`
	LowVRAM   bool `json:"low_vram,omitempty"`
	F16KV     bool `json:"f16_kv,omitempty"`
	LogitsAll bool `json:"logits_all,omitempty"`
	VocabOnly bool `json:"vocab_only,omitempty"`
	UseMMap   bool `json:"use_mmap,omitempty"`
	UseMLock  bool `json:"use_mlock,omitempty"`
	NumThread int  `json:"num_thread,omitempty"`
Michael Yang's avatar
Michael Yang committed
125
126
127
128
129

	// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
	RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
	// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
130
131
}

Bruce MacDonald's avatar
Bruce MacDonald committed
132
type EmbeddingRequest struct {
133
134
135
	Model     string    `json:"model"`
	Prompt    string    `json:"prompt"`
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
136
137
138
139
140
141
142
143

	Options map[string]interface{} `json:"options"`
}

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

144
type CreateRequest struct {
Michael Yang's avatar
Michael Yang committed
145
146
147
148
149
	Model        string `json:"model"`
	Path         string `json:"path"`
	Modelfile    string `json:"modelfile"`
	Stream       *bool  `json:"stream,omitempty"`
	Quantization string `json:"quantization,omitempty"`
Michael Yang's avatar
Michael Yang committed
150
151
152

	// Name is deprecated, see Model
	Name string `json:"name"`
153
154
}

155
type DeleteRequest struct {
Michael Yang's avatar
Michael Yang committed
156
157
158
	Model string `json:"model"`

	// Name is deprecated, see Model
159
160
161
	Name string `json:"name"`
}

Patrick Devine's avatar
Patrick Devine committed
162
type ShowRequest struct {
163
164
165
166
167
	Model    string `json:"model"`
	System   string `json:"system"`
	Template string `json:"template"`

	Options map[string]interface{} `json:"options"`
Michael Yang's avatar
Michael Yang committed
168
169
170

	// Name is deprecated, see Model
	Name string `json:"name"`
Patrick Devine's avatar
Patrick Devine committed
171
172
173
}

type ShowResponse struct {
Patrick Devine's avatar
Patrick Devine committed
174
175
176
177
178
179
	License    string       `json:"license,omitempty"`
	Modelfile  string       `json:"modelfile,omitempty"`
	Parameters string       `json:"parameters,omitempty"`
	Template   string       `json:"template,omitempty"`
	System     string       `json:"system,omitempty"`
	Details    ModelDetails `json:"details,omitempty"`
180
	Messages   []Message    `json:"messages,omitempty"`
Patrick Devine's avatar
Patrick Devine committed
181
182
}

Patrick Devine's avatar
Patrick Devine committed
183
184
185
186
187
type CopyRequest struct {
	Source      string `json:"source"`
	Destination string `json:"destination"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
188
type PullRequest struct {
Michael Yang's avatar
Michael Yang committed
189
	Model    string `json:"model"`
190
	Insecure bool   `json:"insecure,omitempty"`
191
192
193
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
194
195
196

	// Name is deprecated, see Model
	Name string `json:"name"`
Bruce MacDonald's avatar
Bruce MacDonald committed
197
198
}

199
type ProgressResponse struct {
200
201
	Status    string `json:"status"`
	Digest    string `json:"digest,omitempty"`
Michael Yang's avatar
Michael Yang committed
202
203
	Total     int64  `json:"total,omitempty"`
	Completed int64  `json:"completed,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
204
205
}

206
type PushRequest struct {
Michael Yang's avatar
Michael Yang committed
207
	Model    string `json:"model"`
208
	Insecure bool   `json:"insecure,omitempty"`
209
210
211
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
212
213
214

	// Name is deprecated, see Model
	Name string `json:"name"`
215
}
216

Patrick Devine's avatar
Patrick Devine committed
217
type ListResponse struct {
218
	Models []ModelResponse `json:"models"`
Patrick Devine's avatar
Patrick Devine committed
219
220
}

221
type ModelResponse struct {
Patrick Devine's avatar
Patrick Devine committed
222
	Name       string       `json:"name"`
Michael Yang's avatar
Michael Yang committed
223
	Model      string       `json:"model"`
Patrick Devine's avatar
Patrick Devine committed
224
225
226
227
	ModifiedAt time.Time    `json:"modified_at"`
	Size       int64        `json:"size"`
	Digest     string       `json:"digest"`
	Details    ModelDetails `json:"details,omitempty"`
Patrick Devine's avatar
Patrick Devine committed
228
229
}

Patrick Devine's avatar
Patrick Devine committed
230
231
232
233
type TokenResponse struct {
	Token string `json:"token"`
}

Michael Yang's avatar
Michael Yang committed
234
type GenerateResponse struct {
235
236
	Model     string    `json:"model"`
	CreatedAt time.Time `json:"created_at"`
237
	Response  string    `json:"response"`
238

Michael Yang's avatar
Michael Yang committed
239
240
	Done    bool  `json:"done"`
	Context []int `json:"context,omitempty"`
241

Bruce MacDonald's avatar
Bruce MacDonald committed
242
	Metrics
243
244
}

Patrick Devine's avatar
Patrick Devine committed
245
type ModelDetails struct {
246
	ParentModel       string   `json:"parent_model"`
Patrick Devine's avatar
Patrick Devine committed
247
248
249
250
251
252
253
	Format            string   `json:"format"`
	Family            string   `json:"family"`
	Families          []string `json:"families"`
	ParameterSize     string   `json:"parameter_size"`
	QuantizationLevel string   `json:"quantization_level"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
254
255
256
func (m *Metrics) Summary() {
	if m.TotalDuration > 0 {
		fmt.Fprintf(os.Stderr, "total duration:       %v\n", m.TotalDuration)
257
258
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
259
260
	if m.LoadDuration > 0 {
		fmt.Fprintf(os.Stderr, "load duration:        %v\n", m.LoadDuration)
Michael Yang's avatar
Michael Yang committed
261
262
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
263
264
	if m.PromptEvalCount > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", m.PromptEvalCount)
265
266
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
267
268
269
	if m.PromptEvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
270
271
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
	if m.EvalCount > 0 {
		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", m.EvalCount)
274
275
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
276
277
278
	if m.EvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", m.EvalDuration)
		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
279
	}
280
281
}

282
283
var ErrInvalidOpts = fmt.Errorf("invalid options")

284
285
286
287
288
289
290
291
292
293
294
295
296
func (opts *Options) FromMap(m map[string]interface{}) error {
	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

297
	invalidOpts := []string{}
298
299
300
301
	for key, val := range m {
		if opt, ok := jsonOpts[key]; ok {
			field := valueOpts.FieldByName(opt.Name)
			if field.IsValid() && field.CanSet() {
Michael Yang's avatar
Michael Yang committed
302
303
304
305
				if val == nil {
					continue
				}

306
307
				switch field.Kind() {
				case reflect.Int:
Michael Yang's avatar
Michael Yang committed
308
309
310
311
312
313
314
					switch t := val.(type) {
					case int64:
						field.SetInt(t)
					case float64:
						// when JSON unmarshals numbers, it uses float64, not int
						field.SetInt(int64(t))
					default:
315
						return fmt.Errorf("option %q must be of type integer", key)
316
317
318
319
					}
				case reflect.Bool:
					val, ok := val.(bool)
					if !ok {
320
						return fmt.Errorf("option %q must be of type boolean", key)
321
322
323
324
325
326
					}
					field.SetBool(val)
				case reflect.Float32:
					// JSON unmarshals to float64
					val, ok := val.(float64)
					if !ok {
327
						return fmt.Errorf("option %q must be of type float32", key)
328
329
330
331
332
					}
					field.SetFloat(val)
				case reflect.String:
					val, ok := val.(string)
					if !ok {
333
						return fmt.Errorf("option %q must be of type string", key)
334
335
336
337
338
339
					}
					field.SetString(val)
				case reflect.Slice:
					// JSON unmarshals to []interface{}, not []string
					val, ok := val.([]interface{})
					if !ok {
340
						return fmt.Errorf("option %q must be of type array", key)
341
342
343
344
345
346
					}
					// convert []interface{} to []string
					slice := make([]string, len(val))
					for i, item := range val {
						str, ok := item.(string)
						if !ok {
347
							return fmt.Errorf("option %q must be of an array of strings", key)
348
349
350
351
352
353
354
355
						}
						slice[i] = str
					}
					field.Set(reflect.ValueOf(slice))
				default:
					return fmt.Errorf("unknown type loading config params: %v", field.Kind())
				}
			}
356
357
		} else {
			invalidOpts = append(invalidOpts, key)
358
359
		}
	}
360
361
362
363

	if len(invalidOpts) > 0 {
		return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
	}
364
365
366
	return nil
}

Michael Yang's avatar
Michael Yang committed
367
368
func DefaultOptions() Options {
	return Options{
369
370
		// options set on request to runner
		NumPredict:       -1,
371
		NumKeep:          0,
Michael Yang's avatar
Michael Yang committed
372
373
374
375
376
		Temperature:      0.8,
		TopK:             40,
		TopP:             0.9,
		TFSZ:             1.0,
		TypicalP:         1.0,
377
378
379
380
		RepeatLastN:      64,
		RepeatPenalty:    1.1,
		PresencePenalty:  0.0,
		FrequencyPenalty: 0.0,
Michael Yang's avatar
Michael Yang committed
381
382
383
		Mirostat:         0,
		MirostatTau:      5.0,
		MirostatEta:      0.1,
Michael Yang's avatar
Michael Yang committed
384
		PenalizeNewline:  true,
385
		Seed:             -1,
Michael Yang's avatar
Michael Yang committed
386

387
388
		Runner: Runner{
			// options set when the model is loaded
Michael Yang's avatar
Michael Yang committed
389
390
391
392
393
394
395
396
397
398
			NumCtx:    2048,
			NumBatch:  512,
			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
			NumGQA:    1,
			NumThread: 0, // let the runtime decide
			LowVRAM:   false,
			F16KV:     true,
			UseMLock:  false,
			UseMMap:   true,
			UseNUMA:   false,
399
		},
Michael Yang's avatar
Michael Yang committed
400
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
401
}
Michael Yang's avatar
Michael Yang committed
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417

type Duration struct {
	time.Duration
}

func (d *Duration) UnmarshalJSON(b []byte) (err error) {
	var v any
	if err := json.Unmarshal(b, &v); err != nil {
		return err
	}

	d.Duration = 5 * time.Minute

	switch t := v.(type) {
	case float64:
		if t < 0 {
bnorick's avatar
bnorick committed
418
			d.Duration = time.Duration(math.MaxInt64)
419
420
		} else {
			d.Duration = time.Duration(t * float64(time.Second))
Michael Yang's avatar
Michael Yang committed
421
422
423
424
425
426
		}
	case string:
		d.Duration, err = time.ParseDuration(t)
		if err != nil {
			return err
		}
427
		if d.Duration < 0 {
bnorick's avatar
bnorick committed
428
			d.Duration = time.Duration(math.MaxInt64)
429
		}
Michael Yang's avatar
Michael Yang committed
430
431
432
433
	}

	return nil
}
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493

// FormatParams converts specified parameter options to their correct types
func FormatParams(params map[string][]string) (map[string]interface{}, error) {
	opts := Options{}
	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

	out := make(map[string]interface{})
	// iterate params and set values based on json struct tags
	for key, vals := range params {
		if opt, ok := jsonOpts[key]; !ok {
			return nil, fmt.Errorf("unknown parameter '%s'", key)
		} else {
			field := valueOpts.FieldByName(opt.Name)
			if field.IsValid() && field.CanSet() {
				switch field.Kind() {
				case reflect.Float32:
					floatVal, err := strconv.ParseFloat(vals[0], 32)
					if err != nil {
						return nil, fmt.Errorf("invalid float value %s", vals)
					}

					out[key] = float32(floatVal)
				case reflect.Int:
					intVal, err := strconv.ParseInt(vals[0], 10, 64)
					if err != nil {
						return nil, fmt.Errorf("invalid int value %s", vals)
					}

					out[key] = intVal
				case reflect.Bool:
					boolVal, err := strconv.ParseBool(vals[0])
					if err != nil {
						return nil, fmt.Errorf("invalid bool value %s", vals)
					}

					out[key] = boolVal
				case reflect.String:
					out[key] = vals[0]
				case reflect.Slice:
					// TODO: only string slices are supported right now
					out[key] = vals
				default:
					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
				}
			}
		}
	}

	return out, nil
}