llm: normalise kvct parameter handling (#7926)

539be436 · Sam · GitHub · 1bdab9fd · 539be436 · 539be436
Unverified Commit 539be436 authored Dec 04, 2024 by Sam Committed by GitHub Dec 03, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

llm/memory.go llm/memory.go +1 -1

llm/server.go llm/server.go +1 -1

No files found.
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -129,7 +129,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 	var kvct string
 	if fa {
-		requested := envconfig.KvCacheType()
+		requested := strings.ToLower(envconfig.KvCacheType())
 		if requested != "" && ggml.SupportsKVCacheType(requested) {
 			kvct = requested
 		}

--- a/llm/server.go
+++ b/llm/server.go
@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 		fa = false
 	}
-	kvct := envconfig.KvCacheType()
+	kvct := strings.ToLower(envconfig.KvCacheType())
 	if fa {
 		slog.Info("enabling flash attention")