types.go 3.4 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package api

import (
	"fmt"
	"net/http"
	"strings"
)

type Error struct {
	Code    int32  `json:"code"`
	Message string `json:"message"`
}

func (e Error) Error() string {
	if e.Message == "" {
		return fmt.Sprintf("%d %v", e.Code, strings.ToLower(http.StatusText(int(e.Code))))
	}
	return e.Message
}

Bruce MacDonald's avatar
Bruce MacDonald committed
21
22
23
24
type PullRequest struct {
	Model string `json:"model"`
}

Bruce MacDonald's avatar
Bruce MacDonald committed
25
type PullProgress struct {
Michael Yang's avatar
Michael Yang committed
26
27
	Total     int64   `json:"total"`
	Completed int64   `json:"completed"`
Bruce MacDonald's avatar
Bruce MacDonald committed
28
29
30
	Percent   float64 `json:"percent"`
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
31
32
33
type GenerateRequest struct {
	Model  string `json:"model"`
	Prompt string `json:"prompt"`
34

Michael Yang's avatar
Michael Yang committed
35
36
	ModelOptions   `json:"model_opts,omitempty"`
	PredictOptions `json:"predict_opts,omitempty"`
37
38
39
}

type ModelOptions struct {
Michael Yang's avatar
Michael Yang committed
40
41
42
43
44
45
46
47
48
49
50
51
52
	ContextSize int    `json:"context_size,omitempty"`
	Seed        int    `json:"seed,omitempty"`
	NBatch      int    `json:"n_batch,omitempty"`
	F16Memory   bool   `json:"memory_f16,omitempty"`
	MLock       bool   `json:"mlock,omitempty"`
	MMap        bool   `json:"mmap,omitempty"`
	VocabOnly   bool   `json:"vocab_only,omitempty"`
	LowVRAM     bool   `json:"low_vram,omitempty"`
	Embeddings  bool   `json:"embeddings,omitempty"`
	NUMA        bool   `json:"numa,omitempty"`
	NGPULayers  int    `json:"gpu_layers,omitempty"`
	MainGPU     string `json:"main_gpu,omitempty"`
	TensorSplit string `json:"tensor_split,omitempty"`
53
54
55
}

type PredictOptions struct {
Michael Yang's avatar
Michael Yang committed
56
57
58
59
60
61
62
63
64
65
	Seed        int     `json:"seed,omitempty"`
	Threads     int     `json:"threads,omitempty"`
	Tokens      int     `json:"tokens,omitempty"`
	TopK        int     `json:"top_k,omitempty"`
	Repeat      int     `json:"repeat,omitempty"`
	Batch       int     `json:"batch,omitempty"`
	NKeep       int     `json:"nkeep,omitempty"`
	TopP        float64 `json:"top_p,omitempty"`
	Temperature float64 `json:"temp,omitempty"`
	Penalty     float64 `json:"penalty,omitempty"`
66
67
68
	F16KV       bool
	DebugMode   bool
	StopPrompts []string
Michael Yang's avatar
Michael Yang committed
69
	IgnoreEOS   bool `json:"ignore_eos,omitempty"`
70

Michael Yang's avatar
Michael Yang committed
71
72
73
74
75
76
77
78
79
	TailFreeSamplingZ float64 `json:"tfs_z,omitempty"`
	TypicalP          float64 `json:"typical_p,omitempty"`
	FrequencyPenalty  float64 `json:"freq_penalty,omitempty"`
	PresencePenalty   float64 `json:"pres_penalty,omitempty"`
	Mirostat          int     `json:"mirostat,omitempty"`
	MirostatETA       float64 `json:"mirostat_lr,omitempty"`
	MirostatTAU       float64 `json:"mirostat_ent,omitempty"`
	PenalizeNL        bool    `json:"penalize_nl,omitempty"`
	LogitBias         string  `json:"logit_bias,omitempty"`
80
81

	PathPromptCache string
Michael Yang's avatar
Michael Yang committed
82
83
	MLock           bool `json:"mlock,omitempty"`
	MMap            bool `json:"mmap,omitempty"`
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
	PromptCacheAll  bool
	PromptCacheRO   bool
	MainGPU         string
	TensorSplit     string
}

var DefaultModelOptions ModelOptions = ModelOptions{
	ContextSize: 128,
	Seed:        0,
	F16Memory:   true,
	MLock:       false,
	Embeddings:  true,
	MMap:        true,
	LowVRAM:     false,
}

var DefaultPredictOptions PredictOptions = PredictOptions{
	Seed:              -1,
	Threads:           -1,
	Tokens:            512,
	Penalty:           1.1,
	Repeat:            64,
	Batch:             512,
	NKeep:             64,
	TopK:              90,
	TopP:              0.86,
	TailFreeSamplingZ: 1.0,
	TypicalP:          1.0,
	Temperature:       0.8,
	FrequencyPenalty:  0.0,
	PresencePenalty:   0.0,
	Mirostat:          0,
	MirostatTAU:       5.0,
	MirostatETA:       0.1,
	MMap:              true,
	StopPrompts:       []string{"llama"},
Jeffrey Morgan's avatar
Jeffrey Morgan committed
120
121
122
123
124
}

type GenerateResponse struct {
	Response string `json:"response"`
}