Unverified Commit be61a817 authored by Purinda Gunasekara's avatar Purinda Gunasekara Committed by GitHub
Browse files

main-gpu argument is not getting passed to llamacpp, fixed. (#1192)

parent 2fdf1b5f
...@@ -339,6 +339,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers ...@@ -339,6 +339,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
"--model", model, "--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--main-gpu", fmt.Sprintf("%d", opts.MainGPU),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding", "--embedding",
} }
...@@ -544,6 +545,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, ...@@ -544,6 +545,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
"stream": true, "stream": true,
"n_predict": llm.NumPredict, "n_predict": llm.NumPredict,
"n_keep": llm.NumKeep, "n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature, "temperature": llm.Temperature,
"top_k": llm.TopK, "top_k": llm.TopK,
"top_p": llm.TopP, "top_p": llm.TopP,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment