add NumGQA

ad3a7d0e · Michael Yang · 18ffeeec · ad3a7d0e · ad3a7d0e
Commit ad3a7d0e authored Jul 27, 2023 by Michael Yang
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

api/types.go api/types.go +2 -0

llama/llama.go llama/llama.go +1 -0

No files found.
--- a/api/types.go
+++ b/api/types.go
@@ -153,6 +153,7 @@ type Options struct {
 	NumCtx        int  `json:"num_ctx,omitempty"`
 	NumKeep       int  `json:"num_keep,omitempty"`
 	NumBatch      int  `json:"num_batch,omitempty"`
+	NumGQA        int  `json:"num_gqa,omitempty"`
 	NumGPU        int  `json:"num_gpu,omitempty"`
 	MainGPU       int  `json:"main_gpu,omitempty"`
 	LowVRAM       bool `json:"low_vram,omitempty"`
@@ -190,6 +191,7 @@ func DefaultOptions() Options {
 		NumCtx:   2048,
 		NumBatch: 1024,
 		NumGPU:   1,
+		NumGQA:   1,
 		LowVRAM:  false,
 		F16KV:    true,
 		UseMMap:  true,

--- a/llama/llama.go
+++ b/llama/llama.go
@@ -127,6 +127,7 @@ func New(model string, opts api.Options) (*LLM, error) {
 	params.seed = C.uint(llm.Seed)
 	params.n_ctx = C.int(llm.NumCtx)
 	params.n_batch = C.int(llm.NumBatch)
+	params.n_gqa = C.int(llm.NumGQA)
 	params.n_gpu_layers = C.int(llm.NumGPU)
 	params.main_gpu = C.int(llm.MainGPU)
 	params.low_vram = C.bool(llm.LowVRAM)