types.go 23.4 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
package api

3
import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"fmt"
6
	"log/slog"
Michael Yang's avatar
Michael Yang committed
7
	"math"
8
	"os"
9
	"reflect"
10
	"strconv"
11
	"strings"
12
	"time"
13
14

	"github.com/ollama/ollama/envconfig"
15
	"github.com/ollama/ollama/types/model"
16
)
Michael Yang's avatar
Michael Yang committed
17

18
// StatusError is an error with an HTTP status code and message.
Patrick Devine's avatar
Patrick Devine committed
19
type StatusError struct {
20
21
22
	StatusCode   int
	Status       string
	ErrorMessage string `json:"error"`
Patrick Devine's avatar
Patrick Devine committed
23
24
25
}

func (e StatusError) Error() string {
26
27
28
29
30
31
32
33
34
35
	switch {
	case e.Status != "" && e.ErrorMessage != "":
		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
	case e.Status != "":
		return e.Status
	case e.ErrorMessage != "":
		return e.ErrorMessage
	default:
		// this should not happen
		return "something went wrong, please see the ollama server logs for details"
Patrick Devine's avatar
Patrick Devine committed
36
37
38
	}
}

39
// ImageData represents the raw binary data of an image file.
Patrick Devine's avatar
Patrick Devine committed
40
41
type ImageData []byte

42
43
44
// GenerateRequest describes a request sent by [Client.Generate]. While you
// have to specify the Model and Prompt fields, all the other fields have
// reasonable defaults for basic uses.
45
type GenerateRequest struct {
46
47
48
	// Model is the model name; it should be a name familiar to Ollama from
	// the library at https://ollama.com/library
	Model string `json:"model"`
49

50
51
52
	// Prompt is the textual prompt to send to the model.
	Prompt string `json:"prompt"`

53
54
55
	// Suffix is the text that comes after the inserted text.
	Suffix string `json:"suffix"`

56
57
58
59
60
61
62
	// System overrides the model's default system message/prompt.
	System string `json:"system"`

	// Template overrides the model's default prompt template.
	Template string `json:"template"`

	// Context is the context parameter returned from a previous call to
63
	// [Client.Generate]. It can be used to keep a short conversational memory.
64
65
66
67
68
69
70
71
72
	Context []int `json:"context,omitempty"`

	// Stream specifies whether the response is streaming; it is true by default.
	Stream *bool `json:"stream,omitempty"`

	// Raw set to true means that no formatting will be applied to the prompt.
	Raw bool `json:"raw,omitempty"`

	// Format specifies the format to return a response in.
73
	Format json.RawMessage `json:"format,omitempty"`
74
75
76
77
78

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

79
	// Images is an optional list of raw image bytes accompanying this
80
81
82
83
84
	// request, for multimodal models.
	Images []ImageData `json:"images,omitempty"`

	// Options lists model-specific options. For example, temperature can be
	// set through this field, if the model supports it.
85
	Options map[string]any `json:"options"`
86
87
88
89
90
91

	// Think controls whether thinking/reasoning models will think before
	// responding. Needs to be a pointer so we can distinguish between false
	// (request that thinking _not_ be used) and unset (use the old behavior
	// before this option was introduced)
	Think *bool `json:"think,omitempty"`
92
93
}

94
// ChatRequest describes a request sent by [Client.Chat].
Bruce MacDonald's avatar
Bruce MacDonald committed
95
type ChatRequest struct {
96
97
98
99
100
101
	// Model is the model name, as in [GenerateRequest].
	Model string `json:"model"`

	// Messages is the messages of the chat - can be used to keep a chat memory.
	Messages []Message `json:"messages"`

102
	// Stream enables streaming of returned responses; true by default.
103
104
105
	Stream *bool `json:"stream,omitempty"`

	// Format is the format to return the response in (e.g. "json").
106
	Format json.RawMessage `json:"format,omitempty"`
107
108

	// KeepAlive controls how long the model will stay loaded into memory
109
	// following the request.
110
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
111

Michael Yang's avatar
tools  
Michael Yang committed
112
	// Tools is an optional list of tools the model has access to.
113
	Tools `json:"tools,omitempty"`
Michael Yang's avatar
tools  
Michael Yang committed
114

115
	// Options lists model-specific options.
116
	Options map[string]any `json:"options"`
117
118
119
120

	// Think controls whether thinking/reasoning models will think before
	// responding
	Think *bool `json:"think,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
121
122
}

123
124
125
126
127
128
129
type Tools []Tool

func (t Tools) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

130
131
132
133
134
func (t Tool) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

135
136
137
// Message is a single message in a chat sequence. The message contains the
// role ("system", "user", or "assistant"), the content and an optional list
// of images.
Bruce MacDonald's avatar
Bruce MacDonald committed
138
type Message struct {
139
140
141
142
143
	Role    string `json:"role"`
	Content string `json:"content"`
	// Thinking contains the text that was inside thinking tags in the
	// original model output when ChatRequest.Think is enabled.
	Thinking  string      `json:"thinking,omitempty"`
Michael Yang's avatar
tools  
Michael Yang committed
144
145
	Images    []ImageData `json:"images,omitempty"`
	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
146
	ToolName  string      `json:"tool_name,omitempty"`
Michael Yang's avatar
tools  
Michael Yang committed
147
148
}

149
150
151
152
153
154
155
156
157
158
159
160
func (m *Message) UnmarshalJSON(b []byte) error {
	type Alias Message
	var a Alias
	if err := json.Unmarshal(b, &a); err != nil {
		return err
	}

	*m = Message(a)
	m.Role = strings.ToLower(m.Role)
	return nil
}

161
162
163
164
165
type ToolCall struct {
	Function ToolCallFunction `json:"function"`
}

type ToolCallFunction struct {
166
	Index     int                       `json:"index,omitempty"`
167
168
169
170
171
172
173
174
175
176
177
178
179
	Name      string                    `json:"name"`
	Arguments ToolCallFunctionArguments `json:"arguments"`
}

type ToolCallFunctionArguments map[string]any

func (t *ToolCallFunctionArguments) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

type Tool struct {
	Type     string       `json:"type"`
180
	Items    any          `json:"items,omitempty"`
181
182
183
	Function ToolFunction `json:"function"`
}

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
// PropertyType can be either a string or an array of strings
type PropertyType []string

// UnmarshalJSON implements the json.Unmarshaler interface
func (pt *PropertyType) UnmarshalJSON(data []byte) error {
	// Try to unmarshal as a string first
	var s string
	if err := json.Unmarshal(data, &s); err == nil {
		*pt = []string{s}
		return nil
	}

	// If that fails, try to unmarshal as an array of strings
	var a []string
	if err := json.Unmarshal(data, &a); err != nil {
		return err
	}
	*pt = a
	return nil
}

// MarshalJSON implements the json.Marshaler interface
func (pt PropertyType) MarshalJSON() ([]byte, error) {
	if len(pt) == 1 {
		// If there's only one type, marshal as a string
		return json.Marshal(pt[0])
	}
	// Otherwise marshal as an array
	return json.Marshal([]string(pt))
}

// String returns a string representation of the PropertyType
func (pt PropertyType) String() string {
	if len(pt) == 0 {
		return ""
	}
	if len(pt) == 1 {
		return pt[0]
	}
	return fmt.Sprintf("%v", []string(pt))
}

226
227
228
229
230
type ToolFunction struct {
	Name        string `json:"name"`
	Description string `json:"description"`
	Parameters  struct {
		Type       string   `json:"type"`
231
232
		Defs       any      `json:"$defs,omitempty"`
		Items      any      `json:"items,omitempty"`
233
234
		Required   []string `json:"required"`
		Properties map[string]struct {
235
			Type        PropertyType `json:"type"`
236
			Items       any          `json:"items,omitempty"`
237
			Description string       `json:"description"`
238
			Enum        []any        `json:"enum,omitempty"`
239
240
241
242
243
244
245
246
247
		} `json:"properties"`
	} `json:"parameters"`
}

func (t *ToolFunction) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

248
249
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// similar to [GenerateResponse].
Bruce MacDonald's avatar
Bruce MacDonald committed
250
type ChatResponse struct {
251
252
253
	Model      string    `json:"model"`
	CreatedAt  time.Time `json:"created_at"`
	Message    Message   `json:"message"`
Bruce MacDonald's avatar
Bruce MacDonald committed
254
	DoneReason string    `json:"done_reason,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

	Done bool `json:"done"`

	Metrics
}

type Metrics struct {
	TotalDuration      time.Duration `json:"total_duration,omitempty"`
	LoadDuration       time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
	PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
	EvalCount          int           `json:"eval_count,omitempty"`
	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
}

270
271
// Options specified in [GenerateRequest].  If you add a new option here, also
// add it to the API docs.
272
273
274
275
276
277
278
279
280
type Options struct {
	Runner

	// Predict options used at runtime
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float32  `json:"top_p,omitempty"`
281
	MinP             float32  `json:"min_p,omitempty"`
282
283
284
285
286
287
288
289
290
291
292
	TypicalP         float32  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float32  `json:"temperature,omitempty"`
	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
	Stop             []string `json:"stop,omitempty"`
}

// Runner options which must be set when the model is loaded into memory
type Runner struct {
293
294
295
296
297
298
	NumCtx    int   `json:"num_ctx,omitempty"`
	NumBatch  int   `json:"num_batch,omitempty"`
	NumGPU    int   `json:"num_gpu,omitempty"`
	MainGPU   int   `json:"main_gpu,omitempty"`
	UseMMap   *bool `json:"use_mmap,omitempty"`
	NumThread int   `json:"num_thread,omitempty"`
299
300
}

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
// EmbedRequest is the request passed to [Client.Embed].
type EmbedRequest struct {
	// Model is the model name.
	Model string `json:"model"`

	// Input is the input to embed.
	Input any `json:"input"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	Truncate *bool `json:"truncate,omitempty"`

	// Options lists model-specific options.
316
	Options map[string]any `json:"options"`
317
318
319
320
321
}

// EmbedResponse is the response from [Client.Embed].
type EmbedResponse struct {
	Model      string      `json:"model"`
322
	Embeddings [][]float32 `json:"embeddings"`
323
324
325
326

	TotalDuration   time.Duration `json:"total_duration,omitempty"`
	LoadDuration    time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
327
328
}

329
// EmbeddingRequest is the request passed to [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
330
type EmbeddingRequest struct {
331
332
333
334
335
336
337
338
	// Model is the model name.
	Model string `json:"model"`

	// Prompt is the textual prompt to embed.
	Prompt string `json:"prompt"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
339
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
340

341
	// Options lists model-specific options.
342
	Options map[string]any `json:"options"`
Bruce MacDonald's avatar
Bruce MacDonald committed
343
344
}

345
// EmbeddingResponse is the response from [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
346
347
348
349
type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

350
// CreateRequest is the request passed to [Client.Create].
351
type CreateRequest struct {
352
353
354
355
356
357
358
359
360
361
362
363
364
	Model    string `json:"model"`
	Stream   *bool  `json:"stream,omitempty"`
	Quantize string `json:"quantize,omitempty"`

	From       string            `json:"from,omitempty"`
	Files      map[string]string `json:"files,omitempty"`
	Adapters   map[string]string `json:"adapters,omitempty"`
	Template   string            `json:"template,omitempty"`
	License    any               `json:"license,omitempty"`
	System     string            `json:"system,omitempty"`
	Parameters map[string]any    `json:"parameters,omitempty"`
	Messages   []Message         `json:"messages,omitempty"`

Michael Yang's avatar
Michael Yang committed
365
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
366
	Name string `json:"name"`
Michael Yang's avatar
Michael Yang committed
367
	// Deprecated: use Quantize instead
368
	Quantization string `json:"quantization,omitempty"`
369
370
}

371
// DeleteRequest is the request passed to [Client.Delete].
372
type DeleteRequest struct {
Michael Yang's avatar
Michael Yang committed
373
374
	Model string `json:"model"`

Michael Yang's avatar
Michael Yang committed
375
	// Deprecated: set the model name with Model instead
376
377
378
	Name string `json:"name"`
}

379
// ShowRequest is the request passed to [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
380
type ShowRequest struct {
381
382
	Model  string `json:"model"`
	System string `json:"system"`
Patrick Devine's avatar
Patrick Devine committed
383
384

	// Template is deprecated
385
	Template string `json:"template"`
386
	Verbose  bool   `json:"verbose"`
387

388
	Options map[string]any `json:"options"`
Michael Yang's avatar
Michael Yang committed
389

Michael Yang's avatar
Michael Yang committed
390
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
391
	Name string `json:"name"`
Patrick Devine's avatar
Patrick Devine committed
392
393
}

394
// ShowResponse is the response returned from [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
395
type ShowResponse struct {
396
397
398
399
400
401
402
403
404
405
406
407
	License       string             `json:"license,omitempty"`
	Modelfile     string             `json:"modelfile,omitempty"`
	Parameters    string             `json:"parameters,omitempty"`
	Template      string             `json:"template,omitempty"`
	System        string             `json:"system,omitempty"`
	Details       ModelDetails       `json:"details,omitempty"`
	Messages      []Message          `json:"messages,omitempty"`
	ModelInfo     map[string]any     `json:"model_info,omitempty"`
	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
	Tensors       []Tensor           `json:"tensors,omitempty"`
	Capabilities  []model.Capability `json:"capabilities,omitempty"`
	ModifiedAt    time.Time          `json:"modified_at,omitempty"`
Patrick Devine's avatar
Patrick Devine committed
408
409
}

410
// CopyRequest is the request passed to [Client.Copy].
Patrick Devine's avatar
Patrick Devine committed
411
412
413
414
415
type CopyRequest struct {
	Source      string `json:"source"`
	Destination string `json:"destination"`
}

416
// PullRequest is the request passed to [Client.Pull].
Bruce MacDonald's avatar
Bruce MacDonald committed
417
type PullRequest struct {
Michael Yang's avatar
Michael Yang committed
418
	Model    string `json:"model"`
419
420
421
	Insecure bool   `json:"insecure,omitempty"` // Deprecated: ignored
	Username string `json:"username"`           // Deprecated: ignored
	Password string `json:"password"`           // Deprecated: ignored
422
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
423

Michael Yang's avatar
Michael Yang committed
424
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
425
	Name string `json:"name"`
Bruce MacDonald's avatar
Bruce MacDonald committed
426
427
}

428
429
// ProgressResponse is the response passed to progress functions like
// [PullProgressFunc] and [PushProgressFunc].
430
type ProgressResponse struct {
431
432
	Status    string `json:"status"`
	Digest    string `json:"digest,omitempty"`
Michael Yang's avatar
Michael Yang committed
433
434
	Total     int64  `json:"total,omitempty"`
	Completed int64  `json:"completed,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
435
436
}

437
// PushRequest is the request passed to [Client.Push].
438
type PushRequest struct {
Michael Yang's avatar
Michael Yang committed
439
	Model    string `json:"model"`
440
	Insecure bool   `json:"insecure,omitempty"`
441
442
443
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
444

Michael Yang's avatar
Michael Yang committed
445
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
446
	Name string `json:"name"`
447
}
448

449
// ListResponse is the response from [Client.List].
Patrick Devine's avatar
Patrick Devine committed
450
type ListResponse struct {
451
	Models []ListModelResponse `json:"models"`
Patrick Devine's avatar
Patrick Devine committed
452
453
}

454
455
456
457
458
459
460
// ProcessResponse is the response from [Client.Process].
type ProcessResponse struct {
	Models []ProcessModelResponse `json:"models"`
}

// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
461
462
463
464
465
466
	Name       string       `json:"name"`
	Model      string       `json:"model"`
	ModifiedAt time.Time    `json:"modified_at"`
	Size       int64        `json:"size"`
	Digest     string       `json:"digest"`
	Details    ModelDetails `json:"details,omitempty"`
467
468
469
470
}

// ProcessModelResponse is a single model description in [ProcessResponse].
type ProcessModelResponse struct {
471
472
473
474
475
476
477
478
	Name          string       `json:"name"`
	Model         string       `json:"model"`
	Size          int64        `json:"size"`
	Digest        string       `json:"digest"`
	Details       ModelDetails `json:"details,omitempty"`
	ExpiresAt     time.Time    `json:"expires_at"`
	SizeVRAM      int64        `json:"size_vram"`
	ContextLength int          `json:"context_length"`
Patrick Devine's avatar
Patrick Devine committed
479
480
}

Patrick Devine's avatar
Patrick Devine committed
481
482
483
484
type TokenResponse struct {
	Token string `json:"token"`
}

485
// GenerateResponse is the response passed into [GenerateResponseFunc].
Michael Yang's avatar
Michael Yang committed
486
type GenerateResponse struct {
487
488
489
	// Model is the model name that generated the response.
	Model string `json:"model"`

Michael Yang's avatar
Michael Yang committed
490
	// CreatedAt is the timestamp of the response.
491
492
	CreatedAt time.Time `json:"created_at"`

493
494
495
	// Response is the textual response itself.
	Response string `json:"response"`

496
497
498
499
	// Thinking contains the text that was inside thinking tags in the
	// original model output when ChatRequest.Think is enabled.
	Thinking string `json:"thinking,omitempty"`

500
501
502
	// Done specifies if the response is complete.
	Done bool `json:"done"`

503
	// DoneReason is the reason the model stopped generating text.
Bruce MacDonald's avatar
Bruce MacDonald committed
504
	DoneReason string `json:"done_reason,omitempty"`
505

506
507
	// Context is an encoding of the conversation used in this response; this
	// can be sent in the next request to keep a conversational memory.
Michael Yang's avatar
Michael Yang committed
508
	Context []int `json:"context,omitempty"`
509

Bruce MacDonald's avatar
Bruce MacDonald committed
510
	Metrics
511
512
}

513
// ModelDetails provides details about a model.
Patrick Devine's avatar
Patrick Devine committed
514
type ModelDetails struct {
515
	ParentModel       string   `json:"parent_model"`
Patrick Devine's avatar
Patrick Devine committed
516
517
518
519
520
521
522
	Format            string   `json:"format"`
	Family            string   `json:"family"`
	Families          []string `json:"families"`
	ParameterSize     string   `json:"parameter_size"`
	QuantizationLevel string   `json:"quantization_level"`
}

523
524
525
526
527
528
529
// Tensor describes the metadata for a given tensor.
type Tensor struct {
	Name  string   `json:"name"`
	Type  string   `json:"type"`
	Shape []uint64 `json:"shape"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
530
531
532
func (m *Metrics) Summary() {
	if m.TotalDuration > 0 {
		fmt.Fprintf(os.Stderr, "total duration:       %v\n", m.TotalDuration)
533
534
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
535
536
	if m.LoadDuration > 0 {
		fmt.Fprintf(os.Stderr, "load duration:        %v\n", m.LoadDuration)
Michael Yang's avatar
Michael Yang committed
537
538
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
539
540
	if m.PromptEvalCount > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", m.PromptEvalCount)
541
542
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
543
544
545
	if m.PromptEvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
546
547
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
548
549
	if m.EvalCount > 0 {
		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", m.EvalCount)
550
551
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
552
553
554
	if m.EvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", m.EvalDuration)
		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
555
	}
556
557
}

558
func (opts *Options) FromMap(m map[string]any) error {
559
560
561
562
563
564
565
566
567
568
569
570
571
	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

	for key, val := range m {
572
573
		opt, ok := jsonOpts[key]
		if !ok {
574
			slog.Warn("invalid option provided", "option", key)
575
576
			continue
		}
Michael Yang's avatar
Michael Yang committed
577

578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
		field := valueOpts.FieldByName(opt.Name)
		if field.IsValid() && field.CanSet() {
			if val == nil {
				continue
			}

			switch field.Kind() {
			case reflect.Int:
				switch t := val.(type) {
				case int64:
					field.SetInt(t)
				case float64:
					// when JSON unmarshals numbers, it uses float64, not int
					field.SetInt(int64(t))
				default:
					return fmt.Errorf("option %q must be of type integer", key)
				}
			case reflect.Bool:
				val, ok := val.(bool)
				if !ok {
					return fmt.Errorf("option %q must be of type boolean", key)
				}
				field.SetBool(val)
			case reflect.Float32:
				// JSON unmarshals to float64
				val, ok := val.(float64)
				if !ok {
					return fmt.Errorf("option %q must be of type float32", key)
				}
				field.SetFloat(val)
			case reflect.String:
				val, ok := val.(string)
				if !ok {
					return fmt.Errorf("option %q must be of type string", key)
				}
				field.SetString(val)
			case reflect.Slice:
615
616
				// JSON unmarshals to []any, not []string
				val, ok := val.([]any)
617
618
619
				if !ok {
					return fmt.Errorf("option %q must be of type array", key)
				}
620
				// convert []any to []string
621
622
623
				slice := make([]string, len(val))
				for i, item := range val {
					str, ok := item.(string)
624
					if !ok {
625
						return fmt.Errorf("option %q must be of an array of strings", key)
626
					}
627
					slice[i] = str
628
				}
629
				field.Set(reflect.ValueOf(slice))
630
631
632
633
634
635
636
637
638
639
640
			case reflect.Pointer:
				var b bool
				if field.Type() == reflect.TypeOf(&b) {
					val, ok := val.(bool)
					if !ok {
						return fmt.Errorf("option %q must be of type boolean", key)
					}
					field.Set(reflect.ValueOf(&val))
				} else {
					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
				}
641
642
			default:
				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
643
644
645
			}
		}
	}
646

647
648
649
	return nil
}

650
651
// DefaultOptions is the default set of options for [GenerateRequest]; these
// values are used unless the user specifies other values explicitly.
Michael Yang's avatar
Michael Yang committed
652
653
func DefaultOptions() Options {
	return Options{
654
		// options set on request to runner
655
656
657
658
		NumPredict: -1,

		// set a minimal num_keep to avoid issues on context shifts
		NumKeep:          4,
Michael Yang's avatar
Michael Yang committed
659
660
661
662
		Temperature:      0.8,
		TopK:             40,
		TopP:             0.9,
		TypicalP:         1.0,
663
664
665
666
667
		RepeatLastN:      64,
		RepeatPenalty:    1.1,
		PresencePenalty:  0.0,
		FrequencyPenalty: 0.0,
		Seed:             -1,
Michael Yang's avatar
Michael Yang committed
668

669
670
		Runner: Runner{
			// options set when the model is loaded
671
			NumCtx:    int(envconfig.ContextLength()),
Michael Yang's avatar
Michael Yang committed
672
673
			NumBatch:  512,
			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
674
			NumThread: 0,  // let the runtime decide
675
			UseMMap:   nil,
676
		},
Michael Yang's avatar
Michael Yang committed
677
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
678
}
Michael Yang's avatar
Michael Yang committed
679
680
681
682
683

type Duration struct {
	time.Duration
}

684
685
686
687
688
689
690
func (d Duration) MarshalJSON() ([]byte, error) {
	if d.Duration < 0 {
		return []byte("-1"), nil
	}
	return []byte("\"" + d.Duration.String() + "\""), nil
}

Michael Yang's avatar
Michael Yang committed
691
692
693
694
695
696
697
698
699
700
701
func (d *Duration) UnmarshalJSON(b []byte) (err error) {
	var v any
	if err := json.Unmarshal(b, &v); err != nil {
		return err
	}

	d.Duration = 5 * time.Minute

	switch t := v.(type) {
	case float64:
		if t < 0 {
bnorick's avatar
bnorick committed
702
			d.Duration = time.Duration(math.MaxInt64)
703
		} else {
704
			d.Duration = time.Duration(int(t) * int(time.Second))
Michael Yang's avatar
Michael Yang committed
705
706
707
708
709
710
		}
	case string:
		d.Duration, err = time.ParseDuration(t)
		if err != nil {
			return err
		}
711
		if d.Duration < 0 {
bnorick's avatar
bnorick committed
712
			d.Duration = time.Duration(math.MaxInt64)
713
		}
714
715
	default:
		return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
Michael Yang's avatar
Michael Yang committed
716
717
718
719
	}

	return nil
}
720
721

// FormatParams converts specified parameter options to their correct types
722
func FormatParams(params map[string][]string) (map[string]any, error) {
723
724
725
726
727
728
729
730
731
732
733
734
735
	opts := Options{}
	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

736
	out := make(map[string]any)
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
	// iterate params and set values based on json struct tags
	for key, vals := range params {
		if opt, ok := jsonOpts[key]; !ok {
			return nil, fmt.Errorf("unknown parameter '%s'", key)
		} else {
			field := valueOpts.FieldByName(opt.Name)
			if field.IsValid() && field.CanSet() {
				switch field.Kind() {
				case reflect.Float32:
					floatVal, err := strconv.ParseFloat(vals[0], 32)
					if err != nil {
						return nil, fmt.Errorf("invalid float value %s", vals)
					}

					out[key] = float32(floatVal)
				case reflect.Int:
					intVal, err := strconv.ParseInt(vals[0], 10, 64)
					if err != nil {
						return nil, fmt.Errorf("invalid int value %s", vals)
					}

					out[key] = intVal
				case reflect.Bool:
					boolVal, err := strconv.ParseBool(vals[0])
					if err != nil {
						return nil, fmt.Errorf("invalid bool value %s", vals)
					}

					out[key] = boolVal
				case reflect.String:
					out[key] = vals[0]
				case reflect.Slice:
					// TODO: only string slices are supported right now
					out[key] = vals
771
772
773
774
775
776
777
778
779
780
781
				case reflect.Pointer:
					var b bool
					if field.Type() == reflect.TypeOf(&b) {
						boolVal, err := strconv.ParseBool(vals[0])
						if err != nil {
							return nil, fmt.Errorf("invalid bool value %s", vals)
						}
						out[key] = &boolVal
					} else {
						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
					}
782
783
784
785
786
787
788
789
790
				default:
					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
				}
			}
		}
	}

	return out, nil
}