types.go 3.14 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
package api

Bruce MacDonald's avatar
Bruce MacDonald committed
3
4
5
6
type PullRequest struct {
	Model string `json:"model"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
7
type PullProgress struct {
Michael Yang's avatar
Michael Yang committed
8
9
	Total     int64   `json:"total"`
	Completed int64   `json:"completed"`
Bruce MacDonald's avatar
Bruce MacDonald committed
10
11
12
	Percent   float64 `json:"percent"`
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
13
14
15
type GenerateRequest struct {
	Model  string `json:"model"`
	Prompt string `json:"prompt"`
16

17
18
	ModelOptions   *ModelOptions   `json:"model_opts,omitempty"`
	PredictOptions *PredictOptions `json:"predict_opts,omitempty"`
19
20
21
}

type ModelOptions struct {
Michael Yang's avatar
Michael Yang committed
22
23
24
25
26
27
28
29
30
31
32
33
34
	ContextSize int    `json:"context_size,omitempty"`
	Seed        int    `json:"seed,omitempty"`
	NBatch      int    `json:"n_batch,omitempty"`
	F16Memory   bool   `json:"memory_f16,omitempty"`
	MLock       bool   `json:"mlock,omitempty"`
	MMap        bool   `json:"mmap,omitempty"`
	VocabOnly   bool   `json:"vocab_only,omitempty"`
	LowVRAM     bool   `json:"low_vram,omitempty"`
	Embeddings  bool   `json:"embeddings,omitempty"`
	NUMA        bool   `json:"numa,omitempty"`
	NGPULayers  int    `json:"gpu_layers,omitempty"`
	MainGPU     string `json:"main_gpu,omitempty"`
	TensorSplit string `json:"tensor_split,omitempty"`
35
36
37
}

type PredictOptions struct {
Michael Yang's avatar
Michael Yang committed
38
39
40
41
42
43
44
45
46
47
	Seed        int     `json:"seed,omitempty"`
	Threads     int     `json:"threads,omitempty"`
	Tokens      int     `json:"tokens,omitempty"`
	TopK        int     `json:"top_k,omitempty"`
	Repeat      int     `json:"repeat,omitempty"`
	Batch       int     `json:"batch,omitempty"`
	NKeep       int     `json:"nkeep,omitempty"`
	TopP        float64 `json:"top_p,omitempty"`
	Temperature float64 `json:"temp,omitempty"`
	Penalty     float64 `json:"penalty,omitempty"`
48
49
50
	F16KV       bool
	DebugMode   bool
	StopPrompts []string
Michael Yang's avatar
Michael Yang committed
51
	IgnoreEOS   bool `json:"ignore_eos,omitempty"`
52

Michael Yang's avatar
Michael Yang committed
53
54
55
56
57
58
59
60
61
	TailFreeSamplingZ float64 `json:"tfs_z,omitempty"`
	TypicalP          float64 `json:"typical_p,omitempty"`
	FrequencyPenalty  float64 `json:"freq_penalty,omitempty"`
	PresencePenalty   float64 `json:"pres_penalty,omitempty"`
	Mirostat          int     `json:"mirostat,omitempty"`
	MirostatETA       float64 `json:"mirostat_lr,omitempty"`
	MirostatTAU       float64 `json:"mirostat_ent,omitempty"`
	PenalizeNL        bool    `json:"penalize_nl,omitempty"`
	LogitBias         string  `json:"logit_bias,omitempty"`
62
63

	PathPromptCache string
Michael Yang's avatar
Michael Yang committed
64
65
	MLock           bool `json:"mlock,omitempty"`
	MMap            bool `json:"mmap,omitempty"`
66
67
68
69
70
71
72
	PromptCacheAll  bool
	PromptCacheRO   bool
	MainGPU         string
	TensorSplit     string
}

var DefaultModelOptions ModelOptions = ModelOptions{
73
	ContextSize: 512,
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
	Seed:        0,
	F16Memory:   true,
	MLock:       false,
	Embeddings:  true,
	MMap:        true,
	LowVRAM:     false,
}

var DefaultPredictOptions PredictOptions = PredictOptions{
	Seed:              -1,
	Threads:           -1,
	Tokens:            512,
	Penalty:           1.1,
	Repeat:            64,
	Batch:             512,
	NKeep:             64,
	TopK:              90,
	TopP:              0.86,
	TailFreeSamplingZ: 1.0,
	TypicalP:          1.0,
	Temperature:       0.8,
	FrequencyPenalty:  0.0,
	PresencePenalty:   0.0,
	Mirostat:          0,
	MirostatTAU:       5.0,
	MirostatETA:       0.1,
	MMap:              true,
	StopPrompts:       []string{"llama"},
Jeffrey Morgan's avatar
Jeffrey Morgan committed
102
103
104
105
106
}

type GenerateResponse struct {
	Response string `json:"response"`
}