Commit 9a43994c authored by Jesse Gross's avatar Jesse Gross Committed by Jesse Gross
Browse files

ggml: Disable unused pipeline parallelism

We're not currently using it, even in cases where we could. Disabling
it improves generation performance by 10-30% with multiple GPUs.
parent f8a6e888
...@@ -418,7 +418,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { ...@@ -418,7 +418,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
(*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&schedBufts[0])), (*C.ggml_backend_buffer_type_t)(unsafe.Pointer(&schedBufts[0])),
C.int(len(schedBackends)), C.int(len(schedBackends)),
C.size_t(maxGraphNodes), C.size_t(maxGraphNodes),
C._Bool(len(gpus) > 1 && slices.Contains(gpus, output.d)), C._Bool(false),
C._Bool(false), C._Bool(false),
), ),
schedBackends: schedBackends, schedBackends: schedBackends,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment