Unverified Commit bbe41ce4 authored by Bruce MacDonald's avatar Bruce MacDonald Committed by GitHub
Browse files

fix: parallel queueing race condition caused silent failure (#1445)

* fix: queued request failures

- increase parallel requests to 2 to complete queued request, queueing is managed in ollama

* log steam errors
parent 9e1406e4
...@@ -341,6 +341,7 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner ...@@ -341,6 +341,7 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch), "--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--parallel", "2",
"--embedding", "--embedding",
} }
...@@ -631,7 +632,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred ...@@ -631,7 +632,11 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
continue continue
} }
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok { evt, ok := bytes.CutPrefix(line, []byte("data: "))
if !ok {
return fmt.Errorf("error parsing llm response stream: %s", line)
}
var p prediction var p prediction
if err := json.Unmarshal(evt, &p); err != nil { if err := json.Unmarshal(evt, &p); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err) return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
...@@ -661,7 +666,6 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred ...@@ -661,7 +666,6 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
} }
} }
} }
}
if err := scanner.Err(); err != nil { if err := scanner.Err(); err != nil {
if strings.Contains(err.Error(), "unexpected EOF") { if strings.Contains(err.Error(), "unexpected EOF") {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment