"vscode:/vscode.git/clone" did not exist on "049082e013fb71d78f3abf487916f3de2b674908"
Commit 8e8f2c6d authored by Jesse Gross's avatar Jesse Gross Committed by Jesse Gross
Browse files

ollamarunner: Fix memory leak when processing images

The context (and therefore associated input tensors) was not being
properly closed when images were being processed. We were trying to
close them but in reality we were closing over an empty list, preventing
anything from actually being freed.

Fixes #10434
parent 938e8447
...@@ -34,14 +34,10 @@ import ( ...@@ -34,14 +34,10 @@ import (
_ "github.com/ollama/ollama/model/models" _ "github.com/ollama/ollama/model/models"
) )
type contextList struct {
list []ml.Context
}
type Sequence struct { type Sequence struct {
// ctxs are used for allocating tensors that last the lifetime of the sequence, such as // ctxs are used for allocating tensors that last the lifetime of the sequence, such as
// multimodal embeddings // multimodal embeddings
ctxs *contextList ctxs []ml.Context
// batch index // batch index
iBatch int iBatch int
...@@ -177,8 +173,10 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe ...@@ -177,8 +173,10 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
// inputs processes the prompt and images into a list of inputs // inputs processes the prompt and images into a list of inputs
// by splitting the prompt on [img-<n>] tags, tokenizing text and // by splitting the prompt on [img-<n>] tags, tokenizing text and
// decoding images // decoding images
func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *contextList, error) { func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, []ml.Context, error) {
var inputs []input.Input var inputs []input.Input
var ctxs []ml.Context
var parts []string var parts []string
var matches [][]string var matches [][]string
...@@ -192,13 +190,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, * ...@@ -192,13 +190,6 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
parts = []string{prompt} parts = []string{prompt}
} }
var contexts contextList
runtime.AddCleanup(&contexts, func(ctxs []ml.Context) {
for _, ctx := range ctxs {
ctx.Close()
}
}, contexts.list)
postTokenize := false postTokenize := false
for i, part := range parts { for i, part := range parts {
// text - tokenize // text - tokenize
...@@ -228,7 +219,8 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, * ...@@ -228,7 +219,8 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
} }
ctx := s.model.Backend().NewContext() ctx := s.model.Backend().NewContext()
contexts.list = append(contexts.list, ctx) runtime.SetFinalizer(ctx, func(c ml.Context) { c.Close() })
ctxs = append(ctxs, ctx)
imageEmbeddings, err := multimodalProcessor.EncodeMultimodal(ctx, images[imageIndex].Data) imageEmbeddings, err := multimodalProcessor.EncodeMultimodal(ctx, images[imageIndex].Data)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
...@@ -251,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, * ...@@ -251,7 +243,7 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
} }
} }
return inputs, &contexts, nil return inputs, ctxs, nil
} }
type Server struct { type Server struct {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment