You need to sign in or sign up before continuing.
Unverified Commit 539be436 authored by Sam's avatar Sam Committed by GitHub
Browse files

llm: normalise kvct parameter handling (#7926)

parent 1bdab9fd
...@@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string, ...@@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
var kvct string var kvct string
if fa { if fa {
requested := envconfig.KvCacheType() requested := strings.ToLower(envconfig.KvCacheType())
if requested != "" && ggml.SupportsKVCacheType(requested) { if requested != "" && ggml.SupportsKVCacheType(requested) {
kvct = requested kvct = requested
} }
......
...@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter ...@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
fa = false fa = false
} }
kvct := envconfig.KvCacheType() kvct := strings.ToLower(envconfig.KvCacheType())
if fa { if fa {
slog.Info("enabling flash attention") slog.Info("enabling flash attention")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment