Unverified Commit d06bc0cb authored by Bruce MacDonald's avatar Bruce MacDonald Committed by GitHub
Browse files

enable q8, q5, 5_1, and f32 for linux gpu (#699)

parent d104b7e9
...@@ -5,6 +5,7 @@ import ( ...@@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"log" "log"
"os" "os"
"runtime"
"github.com/pbnjay/memory" "github.com/pbnjay/memory"
...@@ -37,20 +38,22 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error ...@@ -37,20 +38,22 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
return nil, err return nil, err
} }
switch ggml.FileType() { if runtime.GOOS == "darwin" {
case "Q8_0": switch ggml.FileType() {
if ggml.Name() != "gguf" && opts.NumGPU != 0 { case "Q8_0":
// GGML Q8_0 do not support Metal API and will if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// cause the runner to segmentation fault so disable GPU // GGML Q8_0 do not support Metal API and will
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") // cause the runner to segmentation fault so disable GPU
opts.NumGPU = 0 log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
} opts.NumGPU = 0
case "F32", "Q5_0", "Q5_1": }
if opts.NumGPU != 0 { case "F32", "Q5_0", "Q5_1":
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will if opts.NumGPU != 0 {
// cause the runner to segmentation fault so disable GPU // F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") // cause the runner to segmentation fault so disable GPU
opts.NumGPU = 0 log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment