@@ -789,7 +797,8 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
res,err:=http.DefaultClient.Do(serverReq)
iferr!=nil{
returnfmt.Errorf("POST predict: %v",err)
slog.Error("post predict","error",err)
returnerrors.New("model runner has unexpectedly stopped, this may be due to resource limitations or an internal error, check ollama server logs for details")
}
deferres.Body.Close()
...
...
@@ -884,6 +893,8 @@ type EmbeddingResponse struct {
returnnil,fmt.Errorf("unable to allocate %v from device %v for new tensor",format.HumanBytes2(uint64(size)),C.GoString(C.ggml_backend_buft_name(c.buft)))