types.go 23 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
package api

3
import (
Michael Yang's avatar
Michael Yang committed
4
	"encoding/json"
5
	"fmt"
6
	"log/slog"
Michael Yang's avatar
Michael Yang committed
7
	"math"
8
	"os"
9
	"reflect"
10
	"strconv"
11
	"strings"
12
	"time"
13
14

	"github.com/ollama/ollama/envconfig"
15
	"github.com/ollama/ollama/types/model"
16
)
Michael Yang's avatar
Michael Yang committed
17

18
// StatusError is an error with an HTTP status code and message.
Patrick Devine's avatar
Patrick Devine committed
19
type StatusError struct {
20
21
22
	StatusCode   int
	Status       string
	ErrorMessage string `json:"error"`
Patrick Devine's avatar
Patrick Devine committed
23
24
25
}

func (e StatusError) Error() string {
26
27
28
29
30
31
32
33
34
35
	switch {
	case e.Status != "" && e.ErrorMessage != "":
		return fmt.Sprintf("%s: %s", e.Status, e.ErrorMessage)
	case e.Status != "":
		return e.Status
	case e.ErrorMessage != "":
		return e.ErrorMessage
	default:
		// this should not happen
		return "something went wrong, please see the ollama server logs for details"
Patrick Devine's avatar
Patrick Devine committed
36
37
38
	}
}

39
// ImageData represents the raw binary data of an image file.
Patrick Devine's avatar
Patrick Devine committed
40
41
type ImageData []byte

42
43
44
// GenerateRequest describes a request sent by [Client.Generate]. While you
// have to specify the Model and Prompt fields, all the other fields have
// reasonable defaults for basic uses.
45
type GenerateRequest struct {
46
47
48
	// Model is the model name; it should be a name familiar to Ollama from
	// the library at https://ollama.com/library
	Model string `json:"model"`
49

50
51
52
	// Prompt is the textual prompt to send to the model.
	Prompt string `json:"prompt"`

53
54
55
	// Suffix is the text that comes after the inserted text.
	Suffix string `json:"suffix"`

56
57
58
59
60
61
62
	// System overrides the model's default system message/prompt.
	System string `json:"system"`

	// Template overrides the model's default prompt template.
	Template string `json:"template"`

	// Context is the context parameter returned from a previous call to
63
	// [Client.Generate]. It can be used to keep a short conversational memory.
64
65
66
67
68
69
70
71
72
	Context []int `json:"context,omitempty"`

	// Stream specifies whether the response is streaming; it is true by default.
	Stream *bool `json:"stream,omitempty"`

	// Raw set to true means that no formatting will be applied to the prompt.
	Raw bool `json:"raw,omitempty"`

	// Format specifies the format to return a response in.
73
	Format json.RawMessage `json:"format,omitempty"`
74
75
76
77
78
79
80
81
82
83
84

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Images is an optional list of base64-encoded images accompanying this
	// request, for multimodal models.
	Images []ImageData `json:"images,omitempty"`

	// Options lists model-specific options. For example, temperature can be
	// set through this field, if the model supports it.
85
	Options map[string]any `json:"options"`
86
87
}

88
// ChatRequest describes a request sent by [Client.Chat].
Bruce MacDonald's avatar
Bruce MacDonald committed
89
type ChatRequest struct {
90
91
92
93
94
95
	// Model is the model name, as in [GenerateRequest].
	Model string `json:"model"`

	// Messages is the messages of the chat - can be used to keep a chat memory.
	Messages []Message `json:"messages"`

96
	// Stream enables streaming of returned responses; true by default.
97
98
99
	Stream *bool `json:"stream,omitempty"`

	// Format is the format to return the response in (e.g. "json").
100
	Format json.RawMessage `json:"format,omitempty"`
101
102

	// KeepAlive controls how long the model will stay loaded into memory
103
	// following the request.
104
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
105

Michael Yang's avatar
tools  
Michael Yang committed
106
	// Tools is an optional list of tools the model has access to.
107
	Tools `json:"tools,omitempty"`
Michael Yang's avatar
tools  
Michael Yang committed
108

109
	// Options lists model-specific options.
110
	Options map[string]any `json:"options"`
Bruce MacDonald's avatar
Bruce MacDonald committed
111
112
}

113
114
115
116
117
118
119
type Tools []Tool

func (t Tools) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

120
121
122
123
124
func (t Tool) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

125
126
127
// Message is a single message in a chat sequence. The message contains the
// role ("system", "user", or "assistant"), the content and an optional list
// of images.
Bruce MacDonald's avatar
Bruce MacDonald committed
128
type Message struct {
Michael Yang's avatar
tools  
Michael Yang committed
129
	Role      string      `json:"role"`
130
	Content   string      `json:"content"`
Michael Yang's avatar
tools  
Michael Yang committed
131
132
133
134
	Images    []ImageData `json:"images,omitempty"`
	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
}

135
136
137
138
139
140
141
142
143
144
145
146
func (m *Message) UnmarshalJSON(b []byte) error {
	type Alias Message
	var a Alias
	if err := json.Unmarshal(b, &a); err != nil {
		return err
	}

	*m = Message(a)
	m.Role = strings.ToLower(m.Role)
	return nil
}

147
148
149
150
151
type ToolCall struct {
	Function ToolCallFunction `json:"function"`
}

type ToolCallFunction struct {
152
	Index     int                       `json:"index,omitempty"`
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
	Name      string                    `json:"name"`
	Arguments ToolCallFunctionArguments `json:"arguments"`
}

type ToolCallFunctionArguments map[string]any

func (t *ToolCallFunctionArguments) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

type Tool struct {
	Type     string       `json:"type"`
	Function ToolFunction `json:"function"`
}

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// PropertyType can be either a string or an array of strings
type PropertyType []string

// UnmarshalJSON implements the json.Unmarshaler interface
func (pt *PropertyType) UnmarshalJSON(data []byte) error {
	// Try to unmarshal as a string first
	var s string
	if err := json.Unmarshal(data, &s); err == nil {
		*pt = []string{s}
		return nil
	}

	// If that fails, try to unmarshal as an array of strings
	var a []string
	if err := json.Unmarshal(data, &a); err != nil {
		return err
	}
	*pt = a
	return nil
}

// MarshalJSON implements the json.Marshaler interface
func (pt PropertyType) MarshalJSON() ([]byte, error) {
	if len(pt) == 1 {
		// If there's only one type, marshal as a string
		return json.Marshal(pt[0])
	}
	// Otherwise marshal as an array
	return json.Marshal([]string(pt))
}

// String returns a string representation of the PropertyType
func (pt PropertyType) String() string {
	if len(pt) == 0 {
		return ""
	}
	if len(pt) == 1 {
		return pt[0]
	}
	return fmt.Sprintf("%v", []string(pt))
}

211
212
213
214
215
216
217
type ToolFunction struct {
	Name        string `json:"name"`
	Description string `json:"description"`
	Parameters  struct {
		Type       string   `json:"type"`
		Required   []string `json:"required"`
		Properties map[string]struct {
218
219
220
			Type        PropertyType `json:"type"`
			Description string       `json:"description"`
			Enum        []string     `json:"enum,omitempty"`
221
222
223
224
225
226
227
228
229
		} `json:"properties"`
	} `json:"parameters"`
}

func (t *ToolFunction) String() string {
	bts, _ := json.Marshal(t)
	return string(bts)
}

230
231
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// similar to [GenerateResponse].
Bruce MacDonald's avatar
Bruce MacDonald committed
232
type ChatResponse struct {
233
234
235
	Model      string    `json:"model"`
	CreatedAt  time.Time `json:"created_at"`
	Message    Message   `json:"message"`
Bruce MacDonald's avatar
Bruce MacDonald committed
236
	DoneReason string    `json:"done_reason,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

	Done bool `json:"done"`

	Metrics
}

type Metrics struct {
	TotalDuration      time.Duration `json:"total_duration,omitempty"`
	LoadDuration       time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
	PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
	EvalCount          int           `json:"eval_count,omitempty"`
	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
}

252
253
// Options specified in [GenerateRequest].  If you add a new option here, also
// add it to the API docs.
254
255
256
257
258
259
260
261
262
type Options struct {
	Runner

	// Predict options used at runtime
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float32  `json:"top_p,omitempty"`
263
	MinP             float32  `json:"min_p,omitempty"`
264
265
266
267
268
269
270
271
272
273
274
275
276
277
	TypicalP         float32  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float32  `json:"temperature,omitempty"`
	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
	Mirostat         int      `json:"mirostat,omitempty"`
	MirostatTau      float32  `json:"mirostat_tau,omitempty"`
	MirostatEta      float32  `json:"mirostat_eta,omitempty"`
	Stop             []string `json:"stop,omitempty"`
}

// Runner options which must be set when the model is loaded into memory
type Runner struct {
278
279
280
281
282
	NumCtx    int   `json:"num_ctx,omitempty"`
	NumBatch  int   `json:"num_batch,omitempty"`
	NumGPU    int   `json:"num_gpu,omitempty"`
	MainGPU   int   `json:"main_gpu,omitempty"`
	LowVRAM   bool  `json:"low_vram,omitempty"`
283
	F16KV     bool  `json:"f16_kv,omitempty"` // Deprecated: This option is ignored
284
285
286
287
288
	LogitsAll bool  `json:"logits_all,omitempty"`
	VocabOnly bool  `json:"vocab_only,omitempty"`
	UseMMap   *bool `json:"use_mmap,omitempty"`
	UseMLock  bool  `json:"use_mlock,omitempty"`
	NumThread int   `json:"num_thread,omitempty"`
289
290
}

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
// EmbedRequest is the request passed to [Client.Embed].
type EmbedRequest struct {
	// Model is the model name.
	Model string `json:"model"`

	// Input is the input to embed.
	Input any `json:"input"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	Truncate *bool `json:"truncate,omitempty"`

	// Options lists model-specific options.
306
	Options map[string]any `json:"options"`
307
308
309
310
311
}

// EmbedResponse is the response from [Client.Embed].
type EmbedResponse struct {
	Model      string      `json:"model"`
312
	Embeddings [][]float32 `json:"embeddings"`
313
314
315
316

	TotalDuration   time.Duration `json:"total_duration,omitempty"`
	LoadDuration    time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
317
318
}

319
// EmbeddingRequest is the request passed to [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
320
type EmbeddingRequest struct {
321
322
323
324
325
326
327
328
	// Model is the model name.
	Model string `json:"model"`

	// Prompt is the textual prompt to embed.
	Prompt string `json:"prompt"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
329
	KeepAlive *Duration `json:"keep_alive,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
330

331
	// Options lists model-specific options.
332
	Options map[string]any `json:"options"`
Bruce MacDonald's avatar
Bruce MacDonald committed
333
334
}

335
// EmbeddingResponse is the response from [Client.Embeddings].
Bruce MacDonald's avatar
Bruce MacDonald committed
336
337
338
339
type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

340
// CreateRequest is the request passed to [Client.Create].
341
type CreateRequest struct {
342
343
344
345
346
347
348
349
350
351
352
353
354
	Model    string `json:"model"`
	Stream   *bool  `json:"stream,omitempty"`
	Quantize string `json:"quantize,omitempty"`

	From       string            `json:"from,omitempty"`
	Files      map[string]string `json:"files,omitempty"`
	Adapters   map[string]string `json:"adapters,omitempty"`
	Template   string            `json:"template,omitempty"`
	License    any               `json:"license,omitempty"`
	System     string            `json:"system,omitempty"`
	Parameters map[string]any    `json:"parameters,omitempty"`
	Messages   []Message         `json:"messages,omitempty"`

Michael Yang's avatar
Michael Yang committed
355
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
356
	Name string `json:"name"`
Michael Yang's avatar
Michael Yang committed
357
	// Deprecated: use Quantize instead
358
	Quantization string `json:"quantization,omitempty"`
359
360
}

361
// DeleteRequest is the request passed to [Client.Delete].
362
type DeleteRequest struct {
Michael Yang's avatar
Michael Yang committed
363
364
	Model string `json:"model"`

Michael Yang's avatar
Michael Yang committed
365
	// Deprecated: set the model name with Model instead
366
367
368
	Name string `json:"name"`
}

369
// ShowRequest is the request passed to [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
370
type ShowRequest struct {
371
372
	Model  string `json:"model"`
	System string `json:"system"`
Patrick Devine's avatar
Patrick Devine committed
373
374

	// Template is deprecated
375
	Template string `json:"template"`
376
	Verbose  bool   `json:"verbose"`
377

378
	Options map[string]any `json:"options"`
Michael Yang's avatar
Michael Yang committed
379

Michael Yang's avatar
Michael Yang committed
380
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
381
	Name string `json:"name"`
Patrick Devine's avatar
Patrick Devine committed
382
383
}

384
// ShowResponse is the response returned from [Client.Show].
Patrick Devine's avatar
Patrick Devine committed
385
type ShowResponse struct {
386
387
388
389
390
391
392
393
394
395
396
397
	License       string             `json:"license,omitempty"`
	Modelfile     string             `json:"modelfile,omitempty"`
	Parameters    string             `json:"parameters,omitempty"`
	Template      string             `json:"template,omitempty"`
	System        string             `json:"system,omitempty"`
	Details       ModelDetails       `json:"details,omitempty"`
	Messages      []Message          `json:"messages,omitempty"`
	ModelInfo     map[string]any     `json:"model_info,omitempty"`
	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
	Tensors       []Tensor           `json:"tensors,omitempty"`
	Capabilities  []model.Capability `json:"capabilities,omitempty"`
	ModifiedAt    time.Time          `json:"modified_at,omitempty"`
Patrick Devine's avatar
Patrick Devine committed
398
399
}

400
// CopyRequest is the request passed to [Client.Copy].
Patrick Devine's avatar
Patrick Devine committed
401
402
403
404
405
type CopyRequest struct {
	Source      string `json:"source"`
	Destination string `json:"destination"`
}

406
// PullRequest is the request passed to [Client.Pull].
Bruce MacDonald's avatar
Bruce MacDonald committed
407
type PullRequest struct {
Michael Yang's avatar
Michael Yang committed
408
	Model    string `json:"model"`
409
410
411
	Insecure bool   `json:"insecure,omitempty"` // Deprecated: ignored
	Username string `json:"username"`           // Deprecated: ignored
	Password string `json:"password"`           // Deprecated: ignored
412
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
413

Michael Yang's avatar
Michael Yang committed
414
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
415
	Name string `json:"name"`
Bruce MacDonald's avatar
Bruce MacDonald committed
416
417
}

418
419
// ProgressResponse is the response passed to progress functions like
// [PullProgressFunc] and [PushProgressFunc].
420
type ProgressResponse struct {
421
422
	Status    string `json:"status"`
	Digest    string `json:"digest,omitempty"`
Michael Yang's avatar
Michael Yang committed
423
424
	Total     int64  `json:"total,omitempty"`
	Completed int64  `json:"completed,omitempty"`
Bruce MacDonald's avatar
Bruce MacDonald committed
425
426
}

427
// PushRequest is the request passed to [Client.Push].
428
type PushRequest struct {
Michael Yang's avatar
Michael Yang committed
429
	Model    string `json:"model"`
430
	Insecure bool   `json:"insecure,omitempty"`
431
432
433
	Username string `json:"username"`
	Password string `json:"password"`
	Stream   *bool  `json:"stream,omitempty"`
Michael Yang's avatar
Michael Yang committed
434

Michael Yang's avatar
Michael Yang committed
435
	// Deprecated: set the model name with Model instead
Michael Yang's avatar
Michael Yang committed
436
	Name string `json:"name"`
437
}
438

439
// ListResponse is the response from [Client.List].
Patrick Devine's avatar
Patrick Devine committed
440
type ListResponse struct {
441
	Models []ListModelResponse `json:"models"`
Patrick Devine's avatar
Patrick Devine committed
442
443
}

444
445
446
447
448
449
450
// ProcessResponse is the response from [Client.Process].
type ProcessResponse struct {
	Models []ProcessModelResponse `json:"models"`
}

// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Patrick Devine's avatar
Patrick Devine committed
451
	Name       string       `json:"name"`
Michael Yang's avatar
Michael Yang committed
452
	Model      string       `json:"model"`
453
	ModifiedAt time.Time    `json:"modified_at"`
Patrick Devine's avatar
Patrick Devine committed
454
455
456
	Size       int64        `json:"size"`
	Digest     string       `json:"digest"`
	Details    ModelDetails `json:"details,omitempty"`
457
458
459
460
461
462
463
464
465
466
467
}

// ProcessModelResponse is a single model description in [ProcessResponse].
type ProcessModelResponse struct {
	Name      string       `json:"name"`
	Model     string       `json:"model"`
	Size      int64        `json:"size"`
	Digest    string       `json:"digest"`
	Details   ModelDetails `json:"details,omitempty"`
	ExpiresAt time.Time    `json:"expires_at"`
	SizeVRAM  int64        `json:"size_vram"`
Patrick Devine's avatar
Patrick Devine committed
468
469
}

470
471
472
473
474
475
476
type RetrieveModelResponse struct {
	Id      string `json:"id"`
	Object  string `json:"object"`
	Created int64  `json:"created"`
	OwnedBy string `json:"owned_by"`
}

Patrick Devine's avatar
Patrick Devine committed
477
478
479
480
type TokenResponse struct {
	Token string `json:"token"`
}

481
// GenerateResponse is the response passed into [GenerateResponseFunc].
Michael Yang's avatar
Michael Yang committed
482
type GenerateResponse struct {
483
484
485
	// Model is the model name that generated the response.
	Model string `json:"model"`

Michael Yang's avatar
Michael Yang committed
486
	// CreatedAt is the timestamp of the response.
487
488
	CreatedAt time.Time `json:"created_at"`

489
490
491
492
493
494
	// Response is the textual response itself.
	Response string `json:"response"`

	// Done specifies if the response is complete.
	Done bool `json:"done"`

495
	// DoneReason is the reason the model stopped generating text.
Bruce MacDonald's avatar
Bruce MacDonald committed
496
	DoneReason string `json:"done_reason,omitempty"`
497

498
499
	// Context is an encoding of the conversation used in this response; this
	// can be sent in the next request to keep a conversational memory.
Michael Yang's avatar
Michael Yang committed
500
	Context []int `json:"context,omitempty"`
501

Bruce MacDonald's avatar
Bruce MacDonald committed
502
	Metrics
503
504
}

505
// ModelDetails provides details about a model.
Patrick Devine's avatar
Patrick Devine committed
506
type ModelDetails struct {
507
	ParentModel       string   `json:"parent_model"`
Patrick Devine's avatar
Patrick Devine committed
508
509
510
511
512
513
514
	Format            string   `json:"format"`
	Family            string   `json:"family"`
	Families          []string `json:"families"`
	ParameterSize     string   `json:"parameter_size"`
	QuantizationLevel string   `json:"quantization_level"`
}

515
516
517
518
519
520
521
// Tensor describes the metadata for a given tensor.
type Tensor struct {
	Name  string   `json:"name"`
	Type  string   `json:"type"`
	Shape []uint64 `json:"shape"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
522
523
524
func (m *Metrics) Summary() {
	if m.TotalDuration > 0 {
		fmt.Fprintf(os.Stderr, "total duration:       %v\n", m.TotalDuration)
525
526
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
527
528
	if m.LoadDuration > 0 {
		fmt.Fprintf(os.Stderr, "load duration:        %v\n", m.LoadDuration)
Michael Yang's avatar
Michael Yang committed
529
530
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
531
532
	if m.PromptEvalCount > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval count:    %d token(s)\n", m.PromptEvalCount)
533
534
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
535
536
537
	if m.PromptEvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "prompt eval duration: %s\n", m.PromptEvalDuration)
		fmt.Fprintf(os.Stderr, "prompt eval rate:     %.2f tokens/s\n", float64(m.PromptEvalCount)/m.PromptEvalDuration.Seconds())
538
539
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
540
541
	if m.EvalCount > 0 {
		fmt.Fprintf(os.Stderr, "eval count:           %d token(s)\n", m.EvalCount)
542
543
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
544
545
546
	if m.EvalDuration > 0 {
		fmt.Fprintf(os.Stderr, "eval duration:        %s\n", m.EvalDuration)
		fmt.Fprintf(os.Stderr, "eval rate:            %.2f tokens/s\n", float64(m.EvalCount)/m.EvalDuration.Seconds())
547
	}
548
549
}

550
func (opts *Options) FromMap(m map[string]any) error {
551
552
553
554
555
556
557
558
559
560
561
562
563
	valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts).Elem()   // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

	for key, val := range m {
564
565
		opt, ok := jsonOpts[key]
		if !ok {
566
			slog.Warn("invalid option provided", "option", key)
567
568
			continue
		}
Michael Yang's avatar
Michael Yang committed
569

570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
		field := valueOpts.FieldByName(opt.Name)
		if field.IsValid() && field.CanSet() {
			if val == nil {
				continue
			}

			switch field.Kind() {
			case reflect.Int:
				switch t := val.(type) {
				case int64:
					field.SetInt(t)
				case float64:
					// when JSON unmarshals numbers, it uses float64, not int
					field.SetInt(int64(t))
				default:
					return fmt.Errorf("option %q must be of type integer", key)
				}
			case reflect.Bool:
				val, ok := val.(bool)
				if !ok {
					return fmt.Errorf("option %q must be of type boolean", key)
				}
				field.SetBool(val)
			case reflect.Float32:
				// JSON unmarshals to float64
				val, ok := val.(float64)
				if !ok {
					return fmt.Errorf("option %q must be of type float32", key)
				}
				field.SetFloat(val)
			case reflect.String:
				val, ok := val.(string)
				if !ok {
					return fmt.Errorf("option %q must be of type string", key)
				}
				field.SetString(val)
			case reflect.Slice:
607
608
				// JSON unmarshals to []any, not []string
				val, ok := val.([]any)
609
610
611
				if !ok {
					return fmt.Errorf("option %q must be of type array", key)
				}
612
				// convert []any to []string
613
614
615
				slice := make([]string, len(val))
				for i, item := range val {
					str, ok := item.(string)
616
					if !ok {
617
						return fmt.Errorf("option %q must be of an array of strings", key)
618
					}
619
					slice[i] = str
620
				}
621
				field.Set(reflect.ValueOf(slice))
622
623
624
625
626
627
628
629
630
631
632
			case reflect.Pointer:
				var b bool
				if field.Type() == reflect.TypeOf(&b) {
					val, ok := val.(bool)
					if !ok {
						return fmt.Errorf("option %q must be of type boolean", key)
					}
					field.Set(reflect.ValueOf(&val))
				} else {
					return fmt.Errorf("unknown type loading config params: %v %v", field.Kind(), field.Type())
				}
633
634
			default:
				return fmt.Errorf("unknown type loading config params: %v", field.Kind())
635
636
637
			}
		}
	}
638

639
640
641
	return nil
}

642
643
// DefaultOptions is the default set of options for [GenerateRequest]; these
// values are used unless the user specifies other values explicitly.
Michael Yang's avatar
Michael Yang committed
644
645
func DefaultOptions() Options {
	return Options{
646
		// options set on request to runner
647
648
649
650
		NumPredict: -1,

		// set a minimal num_keep to avoid issues on context shifts
		NumKeep:          4,
Michael Yang's avatar
Michael Yang committed
651
652
653
654
		Temperature:      0.8,
		TopK:             40,
		TopP:             0.9,
		TypicalP:         1.0,
655
656
657
658
		RepeatLastN:      64,
		RepeatPenalty:    1.1,
		PresencePenalty:  0.0,
		FrequencyPenalty: 0.0,
Michael Yang's avatar
Michael Yang committed
659
660
661
		Mirostat:         0,
		MirostatTau:      5.0,
		MirostatEta:      0.1,
662
		Seed:             -1,
Michael Yang's avatar
Michael Yang committed
663

664
665
		Runner: Runner{
			// options set when the model is loaded
666
			NumCtx:    int(envconfig.ContextLength()),
Michael Yang's avatar
Michael Yang committed
667
668
			NumBatch:  512,
			NumGPU:    -1, // -1 here indicates that NumGPU should be set dynamically
669
			NumThread: 0,  // let the runtime decide
Michael Yang's avatar
Michael Yang committed
670
671
			LowVRAM:   false,
			UseMLock:  false,
672
			UseMMap:   nil,
673
		},
Michael Yang's avatar
Michael Yang committed
674
	}
Jeffrey Morgan's avatar
Jeffrey Morgan committed
675
}
Michael Yang's avatar
Michael Yang committed
676
677
678
679
680

type Duration struct {
	time.Duration
}

681
682
683
684
685
686
687
func (d Duration) MarshalJSON() ([]byte, error) {
	if d.Duration < 0 {
		return []byte("-1"), nil
	}
	return []byte("\"" + d.Duration.String() + "\""), nil
}

Michael Yang's avatar
Michael Yang committed
688
689
690
691
692
693
694
695
696
697
698
func (d *Duration) UnmarshalJSON(b []byte) (err error) {
	var v any
	if err := json.Unmarshal(b, &v); err != nil {
		return err
	}

	d.Duration = 5 * time.Minute

	switch t := v.(type) {
	case float64:
		if t < 0 {
bnorick's avatar
bnorick committed
699
			d.Duration = time.Duration(math.MaxInt64)
700
		} else {
701
			d.Duration = time.Duration(int(t) * int(time.Second))
Michael Yang's avatar
Michael Yang committed
702
703
704
705
706
707
		}
	case string:
		d.Duration, err = time.ParseDuration(t)
		if err != nil {
			return err
		}
708
		if d.Duration < 0 {
bnorick's avatar
bnorick committed
709
			d.Duration = time.Duration(math.MaxInt64)
710
		}
711
712
	default:
		return fmt.Errorf("Unsupported type: '%s'", reflect.TypeOf(v))
Michael Yang's avatar
Michael Yang committed
713
714
715
716
	}

	return nil
}
717
718

// FormatParams converts specified parameter options to their correct types
719
func FormatParams(params map[string][]string) (map[string]any, error) {
720
721
722
723
724
725
726
727
728
729
730
731
732
	opts := Options{}
	valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
	typeOpts := reflect.TypeOf(opts)           // types of the fields in the options struct

	// build map of json struct tags to their types
	jsonOpts := make(map[string]reflect.StructField)
	for _, field := range reflect.VisibleFields(typeOpts) {
		jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
		if jsonTag != "" {
			jsonOpts[jsonTag] = field
		}
	}

733
	out := make(map[string]any)
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
	// iterate params and set values based on json struct tags
	for key, vals := range params {
		if opt, ok := jsonOpts[key]; !ok {
			return nil, fmt.Errorf("unknown parameter '%s'", key)
		} else {
			field := valueOpts.FieldByName(opt.Name)
			if field.IsValid() && field.CanSet() {
				switch field.Kind() {
				case reflect.Float32:
					floatVal, err := strconv.ParseFloat(vals[0], 32)
					if err != nil {
						return nil, fmt.Errorf("invalid float value %s", vals)
					}

					out[key] = float32(floatVal)
				case reflect.Int:
					intVal, err := strconv.ParseInt(vals[0], 10, 64)
					if err != nil {
						return nil, fmt.Errorf("invalid int value %s", vals)
					}

					out[key] = intVal
				case reflect.Bool:
					boolVal, err := strconv.ParseBool(vals[0])
					if err != nil {
						return nil, fmt.Errorf("invalid bool value %s", vals)
					}

					out[key] = boolVal
				case reflect.String:
					out[key] = vals[0]
				case reflect.Slice:
					// TODO: only string slices are supported right now
					out[key] = vals
768
769
770
771
772
773
774
775
776
777
778
				case reflect.Pointer:
					var b bool
					if field.Type() == reflect.TypeOf(&b) {
						boolVal, err := strconv.ParseBool(vals[0])
						if err != nil {
							return nil, fmt.Errorf("invalid bool value %s", vals)
						}
						out[key] = &boolVal
					} else {
						return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
					}
779
780
781
782
783
784
785
786
787
				default:
					return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
				}
			}
		}
	}

	return out, nil
}