types.go 20.6 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
package api

3
import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"fmt"
6
	"log/slog"
Michael Yang's avatar
Michael Yang committed
7
	"math"
8
	"os"
9
	"reflect"
10
	"strconv"
11
	"strings"
12
13
	"time"
)
Michael Yang's avatar
Michael Yang committed
14

15
// StatusError is an error with and HTTP status code.
Patrick Devine's avatar
Patrick Devine committed
16
type StatusError struct {
17
18
19
	StatusCode   int
	Status       string
	ErrorMessage string `json:"error"`
Patrick Devine's avatar
Patrick Devine committed
20
21
22
}

func (e StatusError) Error() string {
23
24
25
26
27
28
29
30
31
32
	switch {
	case e.Status != "" && e.ErrorMessage != "":
		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
	case e.Status != "":
		return e.Status
	case e.ErrorMessage != "":
		return e.ErrorMessage
	default:
		// this should not happen
		return "something went wrong, please see the ollama server logs for details"
Patrick Devine's avatar
Patrick Devine committed
33
34
35
	}
}

36
// ImageData represents the raw binary data of an image file.
Patrick Devine's avatar
Patrick Devine committed
37
38
type ImageData []byte

39
40
41
// GenerateRequest describes a request sent by [Client.Generate]. While you
// have to specify the Model and Prompt fields, all the other fields have
// reasonable defaults for basic uses.
42
type GenerateRequest struct {
43
44
45
	// Model is the model name; it should be a name familiar to Ollama from
	// the library at https://ollama.com/library
	Model string `json:"model"`
46

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
	// Prompt is the textual prompt to send to the model.
	Prompt string `json:"prompt"`

	// System overrides the model's default system message/prompt.
	System string `json:"system"`

	// Template overrides the model's default prompt template.
	Template string `json:"template"`

	// Context is the context parameter returned from a previous call to
	// Generate call. It can be used to keep a short conversational memory.
	Context []int `json:"context,omitempty"`

	// Stream specifies whether the response is streaming; it is true by default.
	Stream *bool `json:"stream,omitempty"`

	// Raw set to true means that no formatting will be applied to the prompt.
	Raw bool `json:"raw,omitempty"`

	// Format specifies the format to return a response in.
	Format string `json:"format"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Images is an optional list of base64-encoded images accompanying this
	// request, for multimodal models.
	Images []ImageData `json:"images,omitempty"`

	// Options lists model-specific options. For example, temperature can be
	// set through this field, if the model supports it.
79
	Options map[string]interface{} `json:"options"`
80
81
}

82
// ChatRequest describes a request sent by [Client.Chat].
Bruce MacDonald's avatar
Bruce MacDonald committed
83
type ChatRequest struct {
84
85
86
87
88
89
90
91
92
93
94
95
96
97
	// Model is the model name, as in [GenerateRequest].
	Model string `json:"model"`

	// Messages is the messages of the chat - can be used to keep a chat memory.
	Messages []Message `json:"messages"`

	// Stream enable streaming of returned response; true by default.
	Stream *bool `json:"stream,omitempty"`

	// Format is the format to return the response in (e.g. "json").
	Format string `json:"format"`

	// KeepAlive controls how long the model will stay loaded into memory
	// followin the request.
98
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
99

Michael Yang's avatar
tools  
Michael Yang committed
100
101
102
	// Tools is an optional list of tools the model has access to.
	Tools []Tool `json:"tools,omitempty"`

103
	// Options lists model-specific options.
Bruce MacDonald's avatar
Bruce MacDonald committed
104
105
106
	Options map[string]interface{} `json:"options"`
}

107
108
109
// Message is a single message in a chat sequence. The message contains the
// role ("system", "user", or "assistant"), the content and an optional list
// of images.
Bruce MacDonald's avatar
Bruce MacDonald committed
110
type Message struct {
Michael Yang's avatar
tools  
Michael Yang committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
	Role      string      `json:"role"`
	Content   string      `json:"content,omitempty"`
	Images    []ImageData `json:"images,omitempty"`
	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
}

type ToolCall struct {
	ID       string `json:"id"`
	Type     string `json:"type"`
	Function struct {
		Name      string         `json:"name"`
		Arguments map[string]any `json:"arguments"`
	} `json:"function"`
}

type Tool struct {
	Type     string `json:"type"`
	Function struct {
		Name        string `json:"name"`
		Description string `json:"description"`
		Parameters  struct {
			Type       string   `json:"type"`
			Required   []string `json:"required"`
			Properties map[string]struct {
				Type        string   `json:"type"`
				Description string   `json:"description"`
				Enum        []string `json:"enum,omitempty"`
			} `json:"properties"`
		} `json:"parameters"`
	} `json:"function"`
Bruce MacDonald's avatar
Bruce MacDonald committed
141
142
}

143
144
145
146
147
148
149
150
151
152
153
154
func (m *Message) UnmarshalJSON(b []byte) error {
	type Alias Message
	var a Alias
	if err := json.Unmarshal(b, &a); err != nil {
		return err
	}

	*m = Message(a)
	m.Role = strings.ToLower(m.Role)
	return nil
}

155
156
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// similar to [GenerateResponse].
Bruce MacDonald's avatar
Bruce MacDonald committed
157
type ChatResponse struct {
158
159
160
	Model      string    `json:"model"`
	CreatedAt  time.Time `json:"created_at"`
	Message    Message   `json:"message"`
Bruce MacDonald's avatar
Bruce MacDonald committed
161
	DoneReason string    `json:"done_reason,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176

	Done bool `json:"done"`

	Metrics
}

type Metrics struct {
	TotalDuration      time.Duration `json:"total_duration,omitempty"`
	LoadDuration       time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
	PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
	EvalCount          int           `json:"eval_count,omitempty"`
	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
}

177
178
// Options specified in [GenerateRequest], if you add a new option here add it
// to the API docs also.
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
type Options struct {
	Runner

	// Predict options used at runtime
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float32  `json:"top_p,omitempty"`
	TFSZ             float32  `json:"tfs_z,omitempty"`
	TypicalP         float32  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float32  `json:"temperature,omitempty"`
	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
	Mirostat         int      `json:"mirostat,omitempty"`
	MirostatTau      float32  `json:"mirostat_tau,omitempty"`
	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
	Stop             []string `json:"stop,omitempty"`
}

// Runner options which must be set when the model is loaded into memory
type Runner struct {
204
205
206
207
208
209
210
211
212
213
214
215
	UseNUMA   bool  `json:"numa,omitempty"`
	NumCtx    int   `json:"num_ctx,omitempty"`
	NumBatch  int   `json:"num_batch,omitempty"`
	NumGPU    int   `json:"num_gpu,omitempty"`
	MainGPU   int   `json:"main_gpu,omitempty"`
	LowVRAM   bool  `json:"low_vram,omitempty"`
	F16KV     bool  `json:"f16_kv,omitempty"`
	LogitsAll bool  `json:"logits_all,omitempty"`
	VocabOnly bool  `json:"vocab_only,omitempty"`
	UseMMap   *bool `json:"use_mmap,omitempty"`
	UseMLock  bool  `json:"use_mlock,omitempty"`
	NumThread int   `json:"num_thread,omitempty"`
216
217
}

218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
// EmbedRequest is the request passed to [Client.Embed].
type EmbedRequest struct {
	// Model is the model name.
	Model string `json:"model"`

	// Input is the input to embed.
	Input any `json:"input"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	Truncate *bool `json:"truncate,omitempty"`

	// Options lists model-specific options.
	Options map[string]interface{} `json:"options"`
}

// EmbedResponse is the response from [Client.Embed].
type EmbedResponse struct {
	Model      string      `json:"model"`
	Embeddings [][]float32 `json:"embeddings,omitempty"`
}

242
// EmbeddingRequest is the request passed to [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
243
type EmbeddingRequest struct {
244
245
246
247
248
249
250
251
	// Model is the model name.
	Model string `json:"model"`

	// Prompt is the textual prompt to embed.
	Prompt string `json:"prompt"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
252
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
253

254
	// Options lists model-specific options.
Bruce MacDonald's avatar
Bruce MacDonald committed
255
256
257
	Options map[string]interface{} `json:"options"`
}

258
// EmbeddingResponse is the response from [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
259
260
261
262
type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

263
// CreateRequest is the request passed to [Client.Create].
264
type CreateRequest struct {
265
266
267
268
269
	Model     string `json:"model"`
	Path      string `json:"path"`
	Modelfile string `json:"modelfile"`
	Stream    *bool  `json:"stream,omitempty"`
	Quantize  string `json:"quantize,omitempty"`
Michael Yang's avatar
Michael Yang committed
270
271
272

	// Name is deprecated, see Model
	Name string `json:"name"`
273
274
275

	// Quantization is deprecated, see Quantize
	Quantization string `json:"quantization,omitempty"`
276
277
}

278
// DeleteRequest is the request passed to [Client.Delete].
279
type DeleteRequest struct {
Michael Yang's avatar
Michael Yang committed
280
281
282
	Model string `json:"model"`

	// Name is deprecated, see Model
283
284
285
	Name string `json:"name"`
}

286
// ShowRequest is the request passed to [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
287
type ShowRequest struct {
288
289
	Model  string `json:"model"`
	System string `json:"system"`
Patrick Devine's avatar
Patrick Devine committed
290
291

	// Template is deprecated
292
	Template string `json:"template"`
293
	Verbose  bool   `json:"verbose"`
294
295

	Options map[string]interface{} `json:"options"`
Michael Yang's avatar
Michael Yang committed
296
297
298

	// Name is deprecated, see Model
	Name string `json:"name"`
Patrick Devine's avatar
Patrick Devine committed
299
300
}

301
// ShowResponse is the response returned from [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
302
type ShowResponse struct {
303
304
305
306
307
308
309
310
311
312
	License       string         `json:"license,omitempty"`
	Modelfile     string         `json:"modelfile,omitempty"`
	Parameters    string         `json:"parameters,omitempty"`
	Template      string         `json:"template,omitempty"`
	System        string         `json:"system,omitempty"`
	Details       ModelDetails   `json:"details,omitempty"`
	Messages      []Message      `json:"messages,omitempty"`
	ModelInfo     map[string]any `json:"model_info,omitempty"`
	ProjectorInfo map[string]any `json:"projector_info,omitempty"`
	ModifiedAt    time.Time      `json:"modified_at,omitempty"`
Patrick Devine's avatar
Patrick Devine committed
313
314
}

315
// CopyRequest is the request passed to [Client.Copy].
Patrick Devine's avatar
Patrick Devine committed
316
317
318
319
320
type CopyRequest struct {
	Source      string `json:"source"`
	Destination string `json:"destination"`
}

321
// PullRequest is the request passed to [Client.Pull].
Bruce MacDonald's avatar
Bruce MacDonald committed
322
type PullRequest struct {
Michael Yang's avatar
Michael Yang committed
323
	Model    string `json:"model"`
324
	Insecure bool   `json:"insecure,omitempty"`
325
326
327
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
328
329
330

	// Name is deprecated, see Model
	Name string `json:"name"`
Bruce MacDonald's avatar
Bruce MacDonald committed
331
332
}

333
334
// ProgressResponse is the response passed to progress functions like
// [PullProgressFunc] and [PushProgressFunc].
335
type ProgressResponse struct {
336
337
	Status    string `json:"status"`
	Digest    string `json:"digest,omitempty"`
Michael Yang's avatar
Michael Yang committed
338
339
	Total     int64  `json:"total,omitempty"`
	Completed int64  `json:"completed,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
340
341
}

342
// PushRequest is the request passed to [Client.Push].
343
type PushRequest struct {
Michael Yang's avatar
Michael Yang committed
344
	Model    string `json:"model"`
345
	Insecure bool   `json:"insecure,omitempty"`
346
347
348
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
349
350
351

	// Name is deprecated, see Model
	Name string `json:"name"`
352
}
353

354
// ListResponse is the response from [Client.List].
Patrick Devine's avatar
Patrick Devine committed
355
type ListResponse struct {
356
	Models []ListModelResponse `json:"models"`
Patrick Devine's avatar
Patrick Devine committed
357
358
}

359
360
361
362
363
364
365
// ProcessResponse is the response from [Client.Process].
type ProcessResponse struct {
	Models []ProcessModelResponse `json:"models"`
}

// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Patrick Devine's avatar
Patrick Devine committed
366
	Name       string       `json:"name"`
Michael Yang's avatar
Michael Yang committed
367
	Model      string       `json:"model"`
368
	ModifiedAt time.Time    `json:"modified_at"`
Patrick Devine's avatar
Patrick Devine committed
369
370
371
	Size       int64        `json:"size"`
	Digest     string       `json:"digest"`
	Details    ModelDetails `json:"details,omitempty"`
372
373
374
375
376
377
378
379
380
381
382
}

// ProcessModelResponse is a single model description in [ProcessResponse].
type ProcessModelResponse struct {
	Name      string       `json:"name"`
	Model     string       `json:"model"`
	Size      int64        `json:"size"`
	Digest    string       `json:"digest"`
	Details   ModelDetails `json:"details,omitempty"`
	ExpiresAt time.Time    `json:"expires_at"`
	SizeVRAM  int64        `json:"size_vram"`
Patrick Devine's avatar
Patrick Devine committed
383
384
}

385
386
387
388
389
390
391
type RetrieveModelResponse struct {
	Id      string `json:"id"`
	Object  string `json:"object"`
	Created int64  `json:"created"`
	OwnedBy string `json:"owned_by"`
}

Patrick Devine's avatar
Patrick Devine committed
392
393
394
395
type TokenResponse struct {
	Token string `json:"token"`
}

396
// GenerateResponse is the response passed into [GenerateResponseFunc].
Michael Yang's avatar
Michael Yang committed
397
type GenerateResponse struct {
398
399
400
	// Model is the model name that generated the response.
	Model string `json:"model"`

Michael Yang's avatar
Michael Yang committed
401
	// CreatedAt is the timestamp of the response.
402
403
	CreatedAt time.Time `json:"created_at"`

404
405
406
	// Response is the textual response itself.
	Response string `json:"response"`

Michael Yang's avatar
tools  
Michael Yang committed
407
408
409
	// ToolCalls is the list of tools the model wants to call
	ToolCalls []ToolCall `json:"tool_calls,omitempty"`

410
411
412
	// Done specifies if the response is complete.
	Done bool `json:"done"`

413
	// DoneReason is the reason the model stopped generating text.
Bruce MacDonald's avatar
Bruce MacDonald committed
414
	DoneReason string `json:"done_reason,omitempty"`
415

416
417
	// Context is an encoding of the conversation used in this response; this
	// can be sent in the next request to keep a conversational memory.
Michael Yang's avatar
Michael Yang committed
418
	Context []int `json:"context,omitempty"`
419

Bruce MacDonald's avatar
Bruce MacDonald committed
420
	Metrics
421
422
}

423
// ModelDetails provides details about a model.
Patrick Devine's avatar
Patrick Devine committed
424
type ModelDetails struct {
425
	ParentModel       string   `json:"parent_model"`
Patrick Devine's avatar
Patrick Devine committed
426
427
428
429
430
431
432
	Format            string   `json:"format"`
	Family            string   `json:"family"`
	Families          []string `json:"families"`
	ParameterSize     string   `json:"parameter_size"`
	QuantizationLevel string   `json:"quantization_level"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
433
434
435
func (m *Metrics) Summary() {
	if m.TotalDuration > 0 {
		fmt.Fprintf(os.Stderr, "total duration:       %v\n", m.TotalDuration)
436
437
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
438
439
	if m.LoadDuration > 0 {
		fmt.Fprintf(os.Stderr, "load duration:        %v\n", m.LoadDuration)
Michael Yang's avatar
Michael Yang committed
440
441
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
442
443
	if m.PromptEvalCount > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", m.PromptEvalCount)
444
445
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
446
447
448
	if m.PromptEvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
449
450
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
451
452
	if m.EvalCount > 0 {
		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", m.EvalCount)
453
454
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
455
456
457
	if m.EvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", m.EvalDuration)
		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
458
	}
459
460
}

461
462
463
464
465
466
467
468
469
470
471
472
473
474
func (opts *Options) FromMap(m map[string]interface{}) error {
	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

	for key, val := range m {
475
476
477
478
479
		opt, ok := jsonOpts[key]
		if !ok {
			slog.Warn("invalid option provided", "option", opt.Name)
			continue
		}
Michael Yang's avatar
Michael Yang committed
480

481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
		field := valueOpts.FieldByName(opt.Name)
		if field.IsValid() && field.CanSet() {
			if val == nil {
				continue
			}

			switch field.Kind() {
			case reflect.Int:
				switch t := val.(type) {
				case int64:
					field.SetInt(t)
				case float64:
					// when JSON unmarshals numbers, it uses float64, not int
					field.SetInt(int64(t))
				default:
					return fmt.Errorf("option %q must be of type integer", key)
				}
			case reflect.Bool:
				val, ok := val.(bool)
				if !ok {
					return fmt.Errorf("option %q must be of type boolean", key)
				}
				field.SetBool(val)
			case reflect.Float32:
				// JSON unmarshals to float64
				val, ok := val.(float64)
				if !ok {
					return fmt.Errorf("option %q must be of type float32", key)
				}
				field.SetFloat(val)
			case reflect.String:
				val, ok := val.(string)
				if !ok {
					return fmt.Errorf("option %q must be of type string", key)
				}
				field.SetString(val)
			case reflect.Slice:
				// JSON unmarshals to []interface{}, not []string
				val, ok := val.([]interface{})
				if !ok {
					return fmt.Errorf("option %q must be of type array", key)
				}
				// convert []interface{} to []string
				slice := make([]string, len(val))
				for i, item := range val {
					str, ok := item.(string)
527
					if !ok {
528
						return fmt.Errorf("option %q must be of an array of strings", key)
529
					}
530
					slice[i] = str
531
				}
532
				field.Set(reflect.ValueOf(slice))
533
534
535
536
537
538
539
540
541
542
543
			case reflect.Pointer:
				var b bool
				if field.Type() == reflect.TypeOf(&b) {
					val, ok := val.(bool)
					if !ok {
						return fmt.Errorf("option %q must be of type boolean", key)
					}
					field.Set(reflect.ValueOf(&val))
				} else {
					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
				}
544
545
			default:
				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
546
547
548
			}
		}
	}
549

550
551
552
	return nil
}

553
554
// DefaultOptions is the default set of options for [GenerateRequest]; these
// values are used unless the user specifies other values explicitly.
Michael Yang's avatar
Michael Yang committed
555
556
func DefaultOptions() Options {
	return Options{
557
		// options set on request to runner
558
559
560
561
		NumPredict: -1,

		// set a minimal num_keep to avoid issues on context shifts
		NumKeep:          4,
Michael Yang's avatar
Michael Yang committed
562
563
564
565
566
		Temperature:      0.8,
		TopK:             40,
		TopP:             0.9,
		TFSZ:             1.0,
		TypicalP:         1.0,
567
568
569
570
		RepeatLastN:      64,
		RepeatPenalty:    1.1,
		PresencePenalty:  0.0,
		FrequencyPenalty: 0.0,
Michael Yang's avatar
Michael Yang committed
571
572
573
		Mirostat:         0,
		MirostatTau:      5.0,
		MirostatEta:      0.1,
Michael Yang's avatar
Michael Yang committed
574
		PenalizeNewline:  true,
575
		Seed:             -1,
Michael Yang's avatar
Michael Yang committed
576

577
578
		Runner: Runner{
			// options set when the model is loaded
Michael Yang's avatar
Michael Yang committed
579
580
581
			NumCtx:    2048,
			NumBatch:  512,
			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
582
			NumThread: 0,  // let the runtime decide
Michael Yang's avatar
Michael Yang committed
583
584
585
			LowVRAM:   false,
			F16KV:     true,
			UseMLock:  false,
586
			UseMMap:   nil,
Michael Yang's avatar
Michael Yang committed
587
			UseNUMA:   false,
588
		},
Michael Yang's avatar
Michael Yang committed
589
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
590
}
Michael Yang's avatar
Michael Yang committed
591
592
593
594
595

type Duration struct {
	time.Duration
}

596
597
598
599
600
601
602
func (d Duration) MarshalJSON() ([]byte, error) {
	if d.Duration < 0 {
		return []byte("-1"), nil
	}
	return []byte("\"" + d.Duration.String() + "\""), nil
}

Michael Yang's avatar
Michael Yang committed
603
604
605
606
607
608
609
610
611
612
613
func (d *Duration) UnmarshalJSON(b []byte) (err error) {
	var v any
	if err := json.Unmarshal(b, &v); err != nil {
		return err
	}

	d.Duration = 5 * time.Minute

	switch t := v.(type) {
	case float64:
		if t < 0 {
bnorick's avatar
bnorick committed
614
			d.Duration = time.Duration(math.MaxInt64)
615
		} else {
616
			d.Duration = time.Duration(int(t) * int(time.Second))
Michael Yang's avatar
Michael Yang committed
617
618
619
620
621
622
		}
	case string:
		d.Duration, err = time.ParseDuration(t)
		if err != nil {
			return err
		}
623
		if d.Duration < 0 {
bnorick's avatar
bnorick committed
624
			d.Duration = time.Duration(math.MaxInt64)
625
		}
626
627
	default:
		return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
Michael Yang's avatar
Michael Yang committed
628
629
630
631
	}

	return nil
}
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682

// FormatParams converts specified parameter options to their correct types
func FormatParams(params map[string][]string) (map[string]interface{}, error) {
	opts := Options{}
	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

	out := make(map[string]interface{})
	// iterate params and set values based on json struct tags
	for key, vals := range params {
		if opt, ok := jsonOpts[key]; !ok {
			return nil, fmt.Errorf("unknown parameter '%s'", key)
		} else {
			field := valueOpts.FieldByName(opt.Name)
			if field.IsValid() && field.CanSet() {
				switch field.Kind() {
				case reflect.Float32:
					floatVal, err := strconv.ParseFloat(vals[0], 32)
					if err != nil {
						return nil, fmt.Errorf("invalid float value %s", vals)
					}

					out[key] = float32(floatVal)
				case reflect.Int:
					intVal, err := strconv.ParseInt(vals[0], 10, 64)
					if err != nil {
						return nil, fmt.Errorf("invalid int value %s", vals)
					}

					out[key] = intVal
				case reflect.Bool:
					boolVal, err := strconv.ParseBool(vals[0])
					if err != nil {
						return nil, fmt.Errorf("invalid bool value %s", vals)
					}

					out[key] = boolVal
				case reflect.String:
					out[key] = vals[0]
				case reflect.Slice:
					// TODO: only string slices are supported right now
					out[key] = vals
683
684
685
686
687
688
689
690
691
692
693
				case reflect.Pointer:
					var b bool
					if field.Type() == reflect.TypeOf(&b) {
						boolVal, err := strconv.ParseBool(vals[0])
						if err != nil {
							return nil, fmt.Errorf("invalid bool value %s", vals)
						}
						out[key] = &boolVal
					} else {
						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
					}
694
695
696
697
698
699
700
701
702
				default:
					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
				}
			}
		}
	}

	return out, nil
}