recent llama.cpp update added kernels for fp32, q5_0, and q5_1

19b7a4d7 · Michael Yang · 8c4022b0 · 19b7a4d7
Commit 19b7a4d7 authored Nov 20, 2023 by Michael Yang
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 8 deletions

llm/llm.go llm/llm.go +1 -8

No files found.
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
-		case "F32", "Q5_0", "Q5_1":
-			if opts.NumGPU != 0 {
-				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
-				// cause the runner to segmentation fault so disable GPU
-				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
-				opts.NumGPU = 0
-			}
 		}
 		var requiredMemory int64