Unverified Commit d06bc0cb authored by Bruce MacDonald's avatar Bruce MacDonald Committed by GitHub
Browse files

enable q8, q5, 5_1, and f32 for linux gpu (#699)

parent d104b7e9
......@@ -5,6 +5,7 @@ import (
"fmt"
"log"
"os"
"runtime"
"github.com/pbnjay/memory"
......@@ -37,6 +38,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
return nil, err
}
if runtime.GOOS == "darwin" {
switch ggml.FileType() {
case "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
......@@ -53,6 +55,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
opts.NumGPU = 0
}
}
}
totalResidentMemory := memory.TotalMemory()
switch ggml.ModelType() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment