Commit 2dd040d0 authored by Jeffrey Morgan's avatar Jeffrey Morgan
Browse files

do not use `--parallel 2` for old runners

parent bbe41ce4
...@@ -59,6 +59,7 @@ ws ::= ([ \t\n] ws)? ...@@ -59,6 +59,7 @@ ws ::= ([ \t\n] ws)?
var llamaCppEmbed embed.FS var llamaCppEmbed embed.FS
type ModelRunner struct { type ModelRunner struct {
Type string // "gguf" or "ggml"
Path string // path to the model runner executable Path string // path to the model runner executable
Accelerated bool Accelerated bool
} }
...@@ -72,25 +73,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner { ...@@ -72,25 +73,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
switch runtime.GOOS { switch runtime.GOOS {
case "darwin": case "darwin":
if runtime.GOARCH == "arm64" { if runtime.GOARCH == "arm64" {
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}} runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
} else { } else {
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}} runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
} }
case "linux": case "linux":
runners = []ModelRunner{ runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true}, {Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}, {Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
} }
case "windows": case "windows":
// TODO: select windows GPU runner here when available // TODO: select windows GPU runner here when available
runners = []ModelRunner{ runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true}, {Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")}, {Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
} }
default: default:
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS) log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
runners = []ModelRunner{ runners = []ModelRunner{
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}, {Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
} }
} }
...@@ -148,6 +149,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner { ...@@ -148,6 +149,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
for _, r := range runners { for _, r := range runners {
// clean the ModelRunner paths so that they match the OS we are running on // clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{ localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
Type: r.Type,
Path: filepath.Clean(path.Join(workDir, r.Path)), Path: filepath.Clean(path.Join(workDir, r.Path)),
Accelerated: r.Accelerated, Accelerated: r.Accelerated,
}) })
...@@ -341,7 +343,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner ...@@ -341,7 +343,6 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--parallel", "2",
"--embedding", "--embedding",
} }
...@@ -403,11 +404,17 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner ...@@ -403,11 +404,17 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
} }
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
params := append(params, "--port", strconv.Itoa(port))
if runner.Type == "gguf" {
params = append(params, "--parallel", "2")
}
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext( cmd := exec.CommandContext(
ctx, ctx,
runner.Path, runner.Path,
append(params, "--port", strconv.Itoa(port))..., params...,
) )
var libraryPaths []string var libraryPaths []string
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment