Commit b25dd179 authored by Michael Yang's avatar Michael Yang
Browse files

allow F16 to use metal

warning F16 uses significantly more memory than quantized model so the
standard requires don't apply.
parent 304f2b6c
......@@ -36,11 +36,11 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
switch ggml.FileType().String() {
case "F32", "F16", "Q5_0", "Q5_1", "Q8_0":
case "F32", "Q5_0", "Q5_1", "Q8_0":
if opts.NumGPU != 0 {
// F32, F16, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, F16, Q5_0, Q5_1, and Q8_0")
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
}
......@@ -48,19 +48,27 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
totalResidentMemory := memory.TotalMemory()
switch ggml.ModelType() {
case ModelType3B, ModelType7B:
if totalResidentMemory < 8*1024*1024 {
if ggml.FileType().String() == "F16" && totalResidentMemory < 16*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 16GB of memory")
} else if totalResidentMemory < 8*1024*1024 {
return nil, fmt.Errorf("model requires at least 8GB of memory")
}
case ModelType13B:
if totalResidentMemory < 16*1024*1024 {
if ggml.FileType().String() == "F16" && totalResidentMemory < 32*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 32GB of memory")
} else if totalResidentMemory < 16*1024*1024 {
return nil, fmt.Errorf("model requires at least 16GB of memory")
}
case ModelType30B, ModelType34B:
if totalResidentMemory < 32*1024*1024 {
if ggml.FileType().String() == "F16" && totalResidentMemory < 64*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 64GB of memory")
} else if totalResidentMemory < 32*1024*1024 {
return nil, fmt.Errorf("model requires at least 32GB of memory")
}
case ModelType65B:
if totalResidentMemory < 64*1024*1024 {
if ggml.FileType().String() == "F16" && totalResidentMemory < 128*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 128GB of memory")
} else if totalResidentMemory < 64*1024*1024 {
return nil, fmt.Errorf("model requires at least 64GB of memory")
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment