Commit cb4a80b6 authored by Michael Yang's avatar Michael Yang
Browse files

fix: regression unsupported metal types

omitting `--n-gpu-layers` means use metal on macos which isn't correct
since ollama uses `num_gpu=0` to explicitly disable gpu for file types
that are not implemented in metal
parent 9ef2fce3
...@@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers ...@@ -292,13 +292,10 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase), "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale), "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding", "--embedding",
} }
if numGPU > 0 {
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
}
if opts.NumGQA > 0 { if opts.NumGQA > 0 {
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA)) params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment